Corpus overview plots

%load_ext autoreload
%autoreload 2
import os
from git import Repo
import dimcat as dc
import ms3
corpus_path = "~/romantic_piano_corpus"
repo = Repo(corpus_path)
notebook_repo = Repo('.', search_parent_directories=True)
notebook_repo_path = notebook_repo.git.rev_parse("--show-toplevel")
print(f"Notebook repository '{os.path.basename(notebook_repo_path)}' @ {notebook_repo.commit().hexsha[:7]}")
print(f"Data repo '{os.path.basename(corpus_path)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Notebook repository 'dimcat' @ 10cf791
Data repo 'romantic_piano_corpus' @ c3ac88c
dimcat version 0.2.0.post1.dev109+g2332fcf.d20230210
ms3 version 1.2.3
from fractions import Fraction
from IPython.display import HTML
import plotly.express as px
import colorlover
import pandas as pd
pd.set_option("display.max_columns", 100)
STD_LAYOUT = {
 'paper_bgcolor': '#FFFFFF',
 'plot_bgcolor': '#FFFFFF',
 'margin': {'l': 40, 'r': 0, 'b': 0, 't': 40, 'pad': 0},
 'font': {'size': 15}
}
#OUTPUT_DIR = "/home/hentsche/Documents/phd/romantic_piano_corpus_report/figures/"
OUTPUT_DIR = os.path.join(corpus_path, 'figures')
os.makedirs(OUTPUT_DIR, exist_ok=True)
#HTML(colorlover.to_html(colorlover.scales))
HTML(colorlover.to_html(colorlover.scales['9']['qual']['Paired']))
fig = px.colors.qualitative.swatches()
fig.show()
corpus_color_scale = px.colors.qualitative.D3

Overview

dataset = dc.Dataset()
dataset.load(directory=corpus_path)
dataset.data
[default|all]
All corpora
-----------
View: This view is called 'default'. It
	- excludes fnames that are not contained in the metadata,
	- filters out file extensions requiring conversion (such as .xml), and
	- excludes review files and folders.

                               has   active   scores measures           notes        expanded
                          metadata     view detected detected parsed detected parsed detected parsed
corpus
beethoven_piano_sonatas        yes  default       87       87     87       87     87       64     64
chopin_mazurkas                yes  default       55       55     55       55     55       55     55
debussy_suite_bergamasque      yes  default        4        4      4        4      4        4      4
dvorak_silhouettes             yes  default       12       12     12       12     12       12     12
grieg_lyric_pieces             yes  default       66       66     66       66     66       66     66
liszt_pelerinage               yes  default       19       19     19       19     19       19     19
medtner_tales                  yes  default       19       19     19       19     19       19     19
schumann_kinderszenen          yes  default       13       13     13       13     13       13     13
tchaikovsky_seasons            yes  default       12       12     12       12     12       12     12

824/2236 files are excluded from this view.

792 files have been excluded based on their subdir.
32 files have been excluded based on their file name.


There are 2 orphans that could not be attributed to any of the respective corpus's fnames.

Metadata

all_metadata = dataset.data.metadata()
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {len(dataset.pieces)} scores.")
all_metadata.groupby(level=0).nth(0)
Concatenated 'metadata.tsv' files cover 287 of the 287 scores.
TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb n_onsets n_onset_positions guitar_chord_count form_label_count label_count annotated_key harmony_version annotators reviewers composed_start composed_end composed_source composer workTitle movementNumber movementTitle workNumber poet lyricist arranger copyright creationDate mscVersion platform source translator title_text subtitle_text lyricist_text composer_text musescore ms3_version subdirectory rel_path has_drumset ambitus imslp musicbrainz viaf wikidata originalFormat staff_1_ambitus staff_1_instrument staff_2_ambitus staff_2_instrument score_integrity imslp.1 key mode typesetter text pdf score integrity comments staff_3_ambitus staff_3_instrument PDF staff_4_ambitus staff_4_instrument
corpus
beethoven_piano_sonatas 1: 2/2 1: -4 154 152 608.0 308.0 304.0 1216.0 NaN 1476.00 1679 985 0 0 241 f 2.3.0 Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) AN 1793 1795 OxfordMusicOnline Ludwig van Beethoven Sonata no. 1 1 Allegro op.2/1 NaN NaN NaN NaN 2019-03-05 3.02 Apple Macintosh NaN NaN Sonata no. 1 1. Allegro NaN Ludwig van Beethoven 3.6.2 1.1.1 MS3 MS3/01-1.mscx False 32-89 (Ab1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/a78520e0-0211-3b5... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 xml 51-89 (Eb3-F6) piano 32-73 (Ab1-Db5) piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
chopin_mazurkas 1: 3/4 1: 2 65 64 193.0 65.0 64.0 193.0 NaN 711.00 810 274 0 0 116 b 2.3.0 Wendelin Bitzan (1.0.0), Adrian Nagel (2.2.0),... JH, AN, DK 1837 1837 OxfordMusicOnline Frédéric Chopin Mazurkas 2 NaN Op. 30 NaN NaN NaN NaN 2019-02-08 3.02 Apple Macintosh https://github.com/craigsapp/chopin-mazurkas NaN Mazurkas, Op. 30 Mazurka in b, Op. 30, no. 2 NaN Frédéric Chopin 3.6.2 1.1.1 MS3 MS3/BI105-2op30-2.mscx False 35-90 (B1-F#6) https://imslp.org/wiki/Mazurkas%2C_Op.30_(Chop... https://musicbrainz.org/work/13e317ea-5e50-3d5... NaN https://www.wikidata.org/wiki/Q6799054 xml 59-90 (B3-F#6) piano 35-71 (B1-B4) piano Cédric Koller NaN B minor / F sharp minor NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
debussy_suite_bergamasque 1: 4/4 1: -1 89 89 356.0 89.0 89.0 356.0 NaN 1533.67 1721 870 0 0 274 F 2.3.0 Adrian Nagel (2.1.1), Amelia Brey (2.3.0) AB, AN 1890 1905 Oxford Music Online Claude Debussy Suite Bergamasque 1 Prelude L.75 NaN NaN NaN NaN 2015-05-19 3.02 Microsoft Windows http://musescore.com/score/890041 NaN Suite Bergamasque 1. Prelude NaN Claude Debussy 3.6.2 1.1.1 MS3 MS3/l075-01_suite_prelude.mscx False 24-94 (C1-Bb6) https://imslp.org/wiki/Suite_bergamasque_(Debu... https://musicbrainz.org/work/fe4cfa64-156a-3d7... https://viaf.org/viaf/177398380 https://www.wikidata.org/wiki/Q29117932 NaN 48-94 (C3-Bb6) Piano 24-90 (C1-F#6) Piano NaN NaN NaN NaN NaN <b>Prélude</b> NaN NaN NaN NaN NaN NaN NaN NaN
dvorak_silhouettes 1: 6/8 1: 4, 7: -5, 49: 4 54 52 156.5 54.0 52.0 156.5 NaN 658.75 957 288 0 0 80 c# 2.3.0 Daniel Grote (2.1.1), Hanné Becker (2.3.0) Johannes Hentschel (2.1.1), AN 1875 1879 OxfordMusicOnline Antonín Dvořák Silhouettes 1 Allegro feroce op. 8 NaN NaN NaN NaN 2018-05-26 3.02 Microsoft Windows NaN NaN Silhouettes, op. 8 1. Allegro feroce NaN Antonín Dvořák 3.6.2 1.1.1 MS3 MS3/op08n01.mscx False 32-92 (G#1-Ab6) https://imslp.org/wiki/Silhouettes%2C_Op.8_(Dv... https://musicbrainz.org/work/80bb714e-a36a-425... https://viaf.org/viaf/174794325/ NaN xml 56-92 (G#3-Ab6) Piano 32-68 (G#1-G#4) Piano NaN NaN NaN NaN NaN NaN https://imslp.org/wiki/Special:ReverseLookup/5... Tom Schreyer NaN NaN NaN NaN NaN NaN
grieg_lyric_pieces 1: 2/4 1: -3 23 23 46.0 23.0 23.0 46.0 NaN 135.50 268 156 0 0 43 Eb 2.3.0 Adrian Nagel (2.1.1), John Heilig (2.30) Adrian Nagel 1864 1867 OxfordMusicOnline Edvard Grieg Lyric Pieces 1 Arietta Op. 12 NaN NaN NaN NaN 2018-10-11 3.02 Microsoft Windows NaN NaN Lyric Pieces, Op. 12 1. Arietta NaN Edvard Grieg 3.6.2 1.1.1 MS3 MS3/op12n01.mscx False 39-79 (Eb2-G5) https://imslp.org/wiki/Lyric_Pieces,_Op.12_(Gr... https://musicbrainz.org/work/b6115546-141a-336... NaN https://www.wikidata.org/wiki/Q2304758 mxl 55-79 (G3-G5) NaN 39-71 (Eb2-Cb5) NaN Tom Schreyer NaN NaN NaN NaN NaN https://imslp.eu/files/imglnks/euimg/8/8e/IMSL... NaN NaN NaN NaN NaN NaN NaN
liszt_pelerinage 1: 4/4 1: 0 97 97 388.0 97.0 97.0 388.0 NaN 1902.42 2879 1069 0 0 174 C 2.3.0 Adrian Nagel (2.1.1), Amelia Brey (2.3.0) Johannes Hentschel (1-33 & 82-97), AB, AN 1848 1855 OxfordMusicOnline Franz Liszt Années de Pèlerinage, Première année: Suisse 1 Chapelle de Guillaume Tell S.160 NaN NaN NaN NaN 2019-01-26 3.02 Microsoft Windows https://musescore.com/score/3987861 NaN Années de Pèlerinage, Première année: Suisse, ... <font size="18"/>1. CHAPELLE DE GUILLAUME TELL... NaN Franz Liszt 3.6.2 1.1.1 MS3 MS3/160.01_Chapelle_de_Guillaume_Tell.mscx False 24-96 (C1-C7) https://imslp.org/wiki/Ann%C3%A9es_de_p%C3%A8l... https://musicbrainz.org/work/5804701d-54a6-4c9... https://viaf.org/viaf/179020308/ https://www.wikidata.org/wiki/Q567462 xml 40-96 (E2-C7) Piano 24-79 (C1-G5) Piano Tom Schreyer NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN https://imslp.org/wiki/Special:ReverseLookup/1... NaN NaN
medtner_tales 1: 4/8 1: -3 81 81 162.0 81.0 81.0 162.0 NaN 603.00 1481 528 0 0 213 c 2.2.0 Wendelin Bitzan Adrian Nagel 1904 1905 OxfordMusicOnline Nikolai Medtner Tales 1 Andantino op.8 NaN NaN NaN NaN 2017-10-21 3.02 Apple Macintosh NaN NaN Tales, op.8 1. Andantino NaN Nikolai Medtner 3.6.2 1.1.1 MS3 MS3/op08n01.mscx False 22-87 (Bb0-Eb6) https://imslp.org/wiki/2_Tales%2C_Op.8_(Medtne... https://musicbrainz.org/work/0cdc7080-06b8-33d... https://viaf.org/viaf/2467165271623010690003 NaN NaN 47-87 (B2-Eb6) Piano 22-77 (Bb0-F5) Piano Tom Schreyer NaN NaN NaN NaN NaN https://imslp.org/wiki/Special:ReverseLookup/5790 NaN NaN NaN NaN NaN NaN NaN
schumann_kinderszenen 1: 2/4 1: 1 22 22 44.0 44.0 44.0 88.0 NaN 134.33 241 141 0 0 44 G 2.3.0 Tal Soker (2.1.1), John Heilig (2.3.0) AN, JHei, JH 1838 1839 OxfordMusicOnline Robert Schumann Kinderszenen 1 Von fremden Ländern und Menschen Op.15 NaN NaN NaN NaN 2017-03-11 3.02 Microsoft Windows http://musescore.com/user/22249306/scores/4778176 NaN Von fremden Ländern und Menschen\n(Of Foreign ... NaN NaN Robert Schumann 3.6.2 1.1.1 MS3 MS3/n01.mscx False 42-79 (F#2-G5) https://imslp.org/wiki/Kinderszenen,_Op.15_(Sc... https://musicbrainz.org/work/04bf8808-7a43-30e... https://viaf.org/viaf/174865068/ https://www.wikidata.org/wiki/Q1569982 NaN 62-79 (D4-G5) Piano 42-69 (F#2-A4) Piano Tom Schreyer NaN NaN NaN NaN NaN https://imslp.org/wiki/Special:ReverseLookup/6... NaN NaN NaN NaN NaN NaN NaN
tchaikovsky_seasons 1: 3/4 1: 3, 29: 1, 63: 3 103 103 309.0 103.0 103.0 309.0 NaN 1058.17 1537 829 0 0 313 A 2.3.0 Adrian Nagel (2.1.1), John Heilig (2.3.0) Johannes Hentschel, AN 1875 1876 OxfordMusicOnline Pyotr Ilyich Tchaikovsky The Seasons 1 January: At the Fireside Op. 37a NaN NaN NaN NaN 11/29/18 3.02 Linux http://musescore.com/user/12839876/scores/3444321 NaN 1. January: At the Fireside from: <i>The Seasons</i>, op. 37a «И мирной неги уголок\nНочь сумраком одела,\nВ... Pyotr Ilyich Tchaikovsky 3.6.2 1.1.1 MS3 MS3/op37a01.mscx False 33-88 (A1-E6) https://imslp.org/wiki/The_Seasons,_Op.37a_(Tc... https://musicbrainz.org/work/6460a645-9844-304... https://viaf.org/viaf/183857288 https://www.wikidata.org/wiki/Q2914902 mxl 53-88 (E#3-E6) Piano 33-88 (A1-E6) Piano NaN NaN NaN NaN NaN NaN https://imslp.org/wiki/Special:ReverseLookup/1... Tom Schreyer NaN NaN NaN NaN NaN NaN
print("VALUE COUNTS OF THE COLUMN 'annotators'")
all_metadata.annotators.value_counts()
VALUE COUNTS OF THE COLUMN 'annotators'
Adrian Nagel (2.1.1), John Heilig (2.3.0)                               75
Wendelin Bitzan (1.0.0), Adrian Nagel (2.2.0), Davor Krkljus (2.3.0)    49
Wendelin Bitzan                                                         20
Adrian Nagel (2.3.0)                                                    16
Adrian Nagel                                                            14
Tal Soker (2.1.1), John Heilig (2.3.0)                                  13
Lydia Carlisi (2.2.0), Adrian Nagel (2.3.0)                             11
Adrian Nagel (2.1.1), Hanné Becker (2.3.0)                               9
Adrian Nagel (2.1.1), Amelia Brey (2.3.0)                                9
Adrian Nagel (2.2.0), Amelia Brey (2.3.0)                                6
Daniel Grote (2.2.0), Adrian Nagel (2.3.0)                               6
Adrian Nagel (2.2.0), Hanné Becker (2.3.0)                               5
Lydia Carlisi (2.2.0), Victor Zheng (2.3.0)                              4
Wendelin Bitzan (1.0.0), Adrian Nagel (2.3.0)                            4
Lydia Carlisi (2.2.0), John Heilig (2.3.0)                               3
Daniel Grote (2.1.1), Hanné Becker (2.3.0)                               3
Lydia Carlisi (2.2.0), Amelia Brey (2.3.0)                               2
Daniel Grote (2.2.0), Hanné Becker (2.3.0)                               2
Adrian Nagel (2.2.0), John Heilig (2.3.0)                                2
Adrian Nagel (2.1.0), John Heilig (2.3.0)                                2
Adrian Nagel (2.2.0), Victor Zheng (2.3.0)                               2
Adrian Nagel (2.1.1), John Heilig (2.30)                                 1
Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0)                             1
Wendelin Bitzan (2.1.1), AJW (2.3.0), Davor Krkljus (2.3.0)              1
Lars & Ya-Chuan                                                          1
Daniel Grote (2.2.0), John Heilig (2.3.0)                                1
Gabriele Ortiz Würth (2.2.0), Adrian Nagel (2.3.0)                       1
Adrian Nagel, Victor Zheng (2.3.0)                                       1
Name: annotators, dtype: int64
print(f"Composition dates range from {all_metadata.composed_start.min()} {all_metadata.composed_start.idxmin()} "
      f"to {all_metadata.composed_end.max()} {all_metadata.composed_end.idxmax()}.")
Composition dates range from 1793 ('beethoven_piano_sonatas', '01-1') to 1925 ('medtner_tales', 'op48n01').
annotated = dc.IsAnnotatedFilter().process_data(dataset)
print(f"Before: {dataset.n_indices} IDs, after filtering: {annotated.n_indices}")
Before: 287 IDs, after filtering: 264

Choose here if you want to see stats for all or only for annotated scores.

#selected = dataset
selected = annotated

Measures

all_measures = selected.get_facet('measures')
print(f"{len(all_measures.index)} measures over {len(all_measures.groupby(level=[0,1]))} files.")
all_measures.head()
29956 measures over 264 files.
mc mn quarterbeats duration_qb keysig timesig act_dur mc_offset numbering_offset dont_count barline breaks repeats next markers jump_bwd jump_fwd play_until quarterbeats_all_endings volta
corpus fname interval
beethoven_piano_sonatas 01-1 [0.0, 1.0) 1 0 0 1.0 -4 2/2 1/4 3/4 <NA> 1 NaN <NA> firstMeasure (2,) NaN NaN NaN NaN NaN <NA>
[1.0, 5.0) 2 1 1 4.0 -4 2/2 1 0 <NA> <NA> NaN <NA> NaN (3,) NaN NaN NaN NaN NaN <NA>
[5.0, 9.0) 3 2 5 4.0 -4 2/2 1 0 <NA> <NA> NaN <NA> NaN (4,) NaN NaN NaN NaN NaN <NA>
[9.0, 13.0) 4 3 9 4.0 -4 2/2 1 0 <NA> <NA> NaN <NA> NaN (5,) NaN NaN NaN NaN NaN <NA>
[13.0, 17.0) 5 4 13 4.0 -4 2/2 1 0 <NA> <NA> NaN <NA> NaN (6,) NaN NaN NaN NaN NaN <NA>
print("Distribution of time signatures per XML measure (MC):")
all_measures.timesig.value_counts(dropna=False)
Distribution of time signatures per XML measure (MC):
3/4      10130
2/4       6073
4/4       5186
6/8       3432
2/2       2523
3/8       1278
12/8       351
9/8        304
6/4        283
9/16       148
4/8         81
2/8         75
12/16       43
12/32       17
6/16        17
17/4         3
33/32        2
15/4         1
14/4         1
10/4         1
10/8         1
12/4         1
7/4          1
5/4          1
3/2          1
11/4         1
18/4         1
Name: timesig, dtype: int64

Notes

all_notes = selected.get_facet('notes')
print(f"{len(all_notes.index)} notes over {len(all_notes.groupby(level=[0,1]))} files.")
all_notes.head()
434219 notes over 264 files.
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice duration gracenote nominal_duration scalar tied tpc midi name octave chord_id volta tremolo
corpus fname interval
beethoven_piano_sonatas 01-1 [0.0, 1.0) 1 0 0 1.0 0 3/4 2/2 1 1 1/4 NaN 1/4 1 <NA> 0 60 C4 4 0 <NA> NaN
[1.0, 2.0) 2 1 1 1.0 0 0 2/2 1 1 1/4 NaN 1/4 1 <NA> -1 65 F4 4 1 <NA> NaN
[2.0, 3.0) 2 1 2 1.0 1/4 1/4 2/2 1 1 1/4 NaN 1/4 1 <NA> -4 68 Ab4 4 2 <NA> NaN
[3.0, 4.0) 2 1 3 1.0 1/2 1/2 2/2 1 1 1/4 NaN 1/4 1 <NA> 0 72 C5 5 3 <NA> NaN
[4.0, 5.0) 2 1 4 1.0 3/4 3/4 2/2 1 1 1/4 NaN 1/4 1 <NA> -1 77 F5 5 4 <NA> NaN

Notes and staves

print("Distribution of notes over staves:")
all_notes.staff.value_counts()
Distribution of notes over staves:
1    230221
2    200617
3      2397
4       984
Name: staff, dtype: Int64
print("Distribution of notes over staves for all pieces with more than two staves\n")
for group, df in all_notes.groupby(level=[0,1]):
    if (df.staff > 2).any():
        print(group)
        print(df.staff.value_counts().to_dict())
Distribution of notes over staves for all pieces with more than two staves

('grieg_lyric_pieces', 'op43n06')
{2: 769, 3: 422, 1: 180}
('liszt_pelerinage', '161.04_Sonetto_47_del_Petrarca')
{1: 1076, 2: 628, 3: 42, 4: 29}
('liszt_pelerinage', '161.07_Apres_une_lecture_du_Dante')
{1: 6638, 2: 5181, 3: 50}
('liszt_pelerinage', '162.01_Gondoliera')
{3: 1745, 4: 955}
('medtner_tales', 'op34n03')
{1: 1219, 2: 816, 3: 89}
('medtner_tales', 'op35n04')
{1: 1678, 2: 1632, 3: 49}
all_notes[all_notes.staff > 2].groupby(level=[0,1]).staff.value_counts()
corpus              fname                              staff
grieg_lyric_pieces  op43n06                            3         422
liszt_pelerinage    161.04_Sonetto_47_del_Petrarca     3          42
                                                       4          29
                    161.07_Apres_une_lecture_du_Dante  3          50
                    162.01_Gondoliera                  3        1745
                                                       4         955
medtner_tales       op34n03                            3          89
                    op35n04                            3          49
Name: staff, dtype: int64

Harmony labels

All symbols, independent of the local key (the mode of which changes their semantics).

all_annotations = annotated.get_facet('expanded')
all_annotations.head()
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus fname interval
beethoven_piano_sonatas 01-1 [0.0, 9.0) 1 0 0 9.0 0 3/4 2/2 2 1 f.i{ NaN f i NaN i NaN i NaN NaN NaN NaN NaN { m True True (0, -3, 1) () 0 0 <NA> NaN
[9.0, 17.0) 4 3 9 8.0 0 0 2/2 2 1 V65 NaN f i NaN V65 NaN V NaN 65 NaN NaN NaN NaN Mm7 True True (5, 2, -1, 1) () 1 5 <NA> NaN
[17.0, 21.0) 6 5 17 4.0 0 0 2/2 2 1 i NaN f i NaN i NaN i NaN NaN NaN NaN NaN NaN m True True (0, -3, 1) () 0 0 <NA> NaN
[21.0, 25.0) 7 6 21 4.0 0 0 2/2 2 1 #viio6 NaN f i NaN #viio6 NaN #vii o 6 NaN NaN NaN NaN o True True (2, -1, 5) () 5 2 <NA> NaN
[25.0, 27.0) 8 7 25 2.0 0 0 2/2 2 1 i6 NaN f i NaN i6 NaN i NaN 6 NaN NaN NaN NaN m True True (-3, 1, 0) () 0 -3 <NA> NaN
no_chord = all_annotations.root.isna()
print(f"Concatenated annotation tables contains {all_annotations.shape[0]} rows. {no_chord.sum()} of them are not chords. Their values are:")
all_annotations.label[no_chord].value_counts(dropna=False).to_dict()
Concatenated annotation tables contains 57436 rows. 681 of them are not chords. Their values are:
{'{': 640, '}': 32, '|PAC}': 6, '|HC': 2, '|PAC': 1}
all_chords = all_annotations[~no_chord].copy()
print(f"Corpus contains {all_chords.shape[0]} tokens and {len(all_chords.chord.unique())} types over {len(all_chords.groupby(level=[0,1]))} documents.")
Corpus contains 56755 tokens and 3125 types over 264 documents.
#from ms3 import write_tsv
#write_tsv(all_annotations[all_annotations.pedalend.notna()], './issues/pedalpoints.tsv', pre_process=False)

Corpus summary

summary = all_metadata
if selected == annotated:
    summary = summary[summary.label_count > 0].copy()
summary.length_qb = all_measures.groupby(level=[0,1]).act_dur.sum() * 4.0
summary = pd.concat([summary,
                     all_notes.groupby(level=[0,1]).size().rename('notes'),
                    ], axis=1)
summary.groupby(level=0).describe().dropna(axis=1, how='all')
last_mc last_mn last_mc_unfolded last_mn_unfolded length_qb_unfolded all_notes_qb n_onsets ... form_label_count label_count composed_start composed_end poet arranger mscVersion translator notes
count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean ... mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count count count mean std min 25% 50% 75% max count count mean std min 25% 50% 75% max
corpus
beethoven_piano_sonatas 64.0 184.703125 99.997409 28.0 111.25 169.0 253.50 543.0 64.0 182.218750 99.937674 28.0 108.75 166.5 253.25 543.0 64.0 238.546875 136.995299 28.0 129.25 210.5 309.25 672.0 64.0 236.078125 136.544813 28.0 124.25 207.5 308.25 664.0 64.0 719.642500 440.937916 84.0 365.500 603.50 958.125 1872.00 64.0 1559.357031 966.510342 232.12 790.720 1365.040 2032.4375 4265.05 64.0 2532.312500 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 64.0 343.156250 171.842048 82.0 234.50 311.5 398.75 869.0 64.0 1801.593750 8.466027 1793.0 1795.0 1798.5 1803.25 1821.0 64.0 1802.343750 8.300351 1795.0 1797.0 1798.5 1804.25 1822.0 0.0 0.0 64.0 3.02 0.000000e+00 3.02 3.02 3.02 3.02 3.02 0.0 64.0 2592.937500 1507.781181 517.0 1486.00 2113.5 3663.00 6817.0
chopin_mazurkas 55.0 89.509091 47.668171 20.0 58.50 75.0 112.00 225.0 55.0 92.527273 55.124054 20.0 56.50 72.0 115.50 247.0 53.0 102.320755 48.882030 12.0 68.00 98.0 125.00 225.0 53.0 101.377358 48.774294 12.0 68.00 96.0 120.00 224.0 53.0 304.533019 146.421218 35.0 204.000 289.00 360.000 673.00 55.0 923.672545 473.585484 208.00 583.625 782.000 1206.5000 2207.00 55.0 1017.563636 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 55.0 165.945455 97.501836 38.0 104.00 139.0 213.50 481.0 55.0 1836.472727 6.160972 1825.0 1831.0 1837.0 1842.00 1848.0 55.0 1836.909091 5.876106 1826.0 1832.0 1837.0 1842.00 1849.0 0.0 0.0 55.0 3.02 1.344547e-15 3.02 3.02 3.02 3.02 3.02 0.0 55.0 1040.018182 525.964634 252.0 680.00 921.0 1353.50 2265.0
debussy_suite_bergamasque 4.0 105.250000 36.270971 72.0 84.75 96.5 117.00 156.0 4.0 105.250000 36.270971 72.0 84.75 96.5 117.00 156.0 4.0 105.250000 36.270971 72.0 84.75 96.5 117.00 156.0 4.0 105.250000 36.270971 72.0 84.75 96.5 117.00 156.0 4.0 404.000000 147.837749 312.0 321.000 340.00 423.000 624.00 4.0 1522.250000 231.693064 1266.00 1414.500 1498.835 1606.5850 1825.33 4.0 1943.000000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0 253.250000 70.035110 150.0 243.00 279.0 289.25 305.0 4.0 1890.000000 0.000000 1890.0 1890.0 1890.0 1890.00 1890.0 4.0 1905.000000 0.000000 1905.0 1905.0 1905.0 1905.00 1905.0 0.0 0.0 4.0 3.02 0.000000e+00 3.02 3.02 3.02 3.02 3.02 0.0 4.0 2052.500000 327.479770 1680.0 1839.75 2061.0 2273.75 2408.0
dvorak_silhouettes 12.0 56.500000 18.128330 15.0 51.50 58.5 63.75 81.0 12.0 56.166667 17.928308 15.0 50.00 58.5 63.75 80.0 12.0 56.500000 18.128330 15.0 51.50 58.5 63.75 81.0 12.0 56.166667 17.928308 15.0 50.00 58.5 63.75 80.0 12.0 154.375000 51.041037 63.5 120.000 150.25 177.750 241.00 12.0 582.626667 210.029409 278.75 418.000 560.375 689.8750 1037.00 12.0 842.083333 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 12.0 128.250000 52.829270 67.0 94.75 108.5 146.00 238.0 12.0 1875.000000 0.000000 1875.0 1875.0 1875.0 1875.00 1875.0 12.0 1879.000000 0.000000 1879.0 1879.0 1879.0 1879.00 1879.0 0.0 0.0 12.0 3.02 0.000000e+00 3.02 3.02 3.02 3.02 3.02 0.0 12.0 887.416667 294.495704 382.0 684.25 877.0 1074.25 1440.0
grieg_lyric_pieces 66.0 82.500000 47.123161 23.0 47.25 72.0 96.75 204.0 66.0 82.030303 47.118499 23.0 47.25 72.0 95.00 204.0 66.0 91.439394 52.872307 23.0 53.25 76.5 112.50 252.0 66.0 90.954545 52.824071 23.0 53.25 76.5 112.50 252.0 66.0 276.261364 160.596176 46.0 174.000 223.50 336.250 748.00 66.0 844.019697 509.227407 135.50 477.250 707.750 1136.7500 2389.75 66.0 945.924242 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 66.0 124.712121 57.222579 21.0 87.00 111.0 156.00 313.0 66.0 1887.393939 10.829985 1864.0 1886.0 1889.0 1896.00 1901.0 66.0 1889.060606 10.033603 1867.0 1886.0 1891.0 1896.00 1901.0 0.0 0.0 66.0 3.02 4.474922e-16 3.02 3.02 3.02 3.02 3.02 0.0 66.0 996.575758 626.270329 273.0 548.75 780.5 1228.25 3691.0
liszt_pelerinage 19.0 138.947368 113.069533 49.0 72.50 97.0 148.00 481.0 19.0 138.157895 112.514820 48.0 72.50 97.0 146.50 479.0 19.0 138.947368 113.069533 49.0 72.50 97.0 148.00 481.0 19.0 138.157895 112.514820 48.0 72.50 97.0 146.50 479.0 19.0 511.013158 363.761729 120.0 277.000 388.00 647.500 1505.25 19.0 1763.276842 1399.737520 289.50 867.085 1169.750 2369.4600 5885.89 19.0 3081.736842 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 19.0 266.736842 196.676226 84.0 135.00 200.0 290.50 716.0 19.0 1846.052632 7.419746 1838.0 1838.0 1848.0 1848.00 1859.0 19.0 1857.052632 2.247806 1855.0 1855.0 1858.0 1858.00 1861.0 0.0 0.0 19.0 3.02 4.562583e-16 3.02 3.02 3.02 3.02 3.02 0.0 19.0 3133.368421 2888.257764 749.0 1486.50 2061.0 2861.00 11869.0
medtner_tales 19.0 130.210526 115.216983 48.0 71.50 81.0 142.50 554.0 19.0 129.684211 115.060879 47.0 71.00 81.0 141.50 553.0 19.0 136.368421 125.640232 48.0 71.50 81.0 163.50 599.0 19.0 135.894737 125.446268 47.0 71.00 81.0 163.00 598.0 19.0 359.368421 235.116924 130.0 189.750 244.50 506.500 971.50 19.0 1186.942632 869.657119 381.63 607.460 754.770 1768.0400 3519.00 19.0 2165.473684 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 19.0 354.210526 214.154560 116.0 204.00 307.0 415.00 1020.0 19.0 1915.263158 5.942709 1904.0 1910.0 1916.0 1918.50 1925.0 19.0 1916.736842 5.857968 1905.0 1912.0 1917.0 1920.50 1925.0 0.0 0.0 19.0 3.02 4.562583e-16 3.02 3.02 3.02 3.02 3.02 0.0 19.0 2259.421053 1394.984003 826.0 1402.00 1917.0 2860.00 6944.0
schumann_kinderszenen 13.0 31.230769 12.008010 17.0 24.00 27.0 34.00 58.0 13.0 30.153846 11.985033 16.0 24.00 25.0 32.00 57.0 13.0 40.230769 12.975322 17.0 32.00 42.0 48.00 64.0 13.0 39.230769 13.000986 17.0 32.00 40.0 48.00 64.0 13.0 93.730769 32.815688 34.0 64.000 96.00 120.000 144.00 13.0 278.820000 119.650650 106.00 173.000 305.500 365.5000 471.00 13.0 385.538462 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 13.0 72.923077 30.148139 44.0 49.00 67.0 84.00 140.0 13.0 1838.000000 0.000000 1838.0 1838.0 1838.0 1838.00 1838.0 13.0 1839.000000 0.000000 1839.0 1839.0 1839.0 1839.00 1839.0 0.0 0.0 13.0 3.02 4.622227e-16 3.02 3.02 3.02 3.02 3.02 0.0 13.0 401.769231 124.614843 244.0 292.00 402.0 466.00 632.0
tchaikovsky_seasons 12.0 104.333333 50.102320 46.0 76.25 89.0 119.50 199.0 12.0 104.166667 49.963320 46.0 76.25 89.0 119.50 198.0 12.0 112.583333 66.422146 46.0 76.25 89.0 122.50 263.0 12.0 112.416667 66.339976 46.0 76.25 89.0 122.50 263.0 12.0 350.375000 183.100336 92.0 249.875 320.50 367.500 789.00 12.0 1136.986667 466.493146 321.75 876.625 1013.045 1332.2075 1994.00 12.0 1514.083333 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 12.0 254.916667 106.769470 119.0 189.50 235.5 286.75 514.0 12.0 1875.000000 0.000000 1875.0 1875.0 1875.0 1875.00 1875.0 12.0 1876.000000 0.000000 1876.0 1876.0 1876.0 1876.00 1876.0 0.0 0.0 12.0 3.02 0.000000e+00 3.02 3.02 3.02 3.02 3.02 0.0 12.0 1562.583333 573.170998 626.0 1113.25 1585.5 1910.00 2523.0

9 rows × 123 columns

mean_composition_years = summary.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
corpus_colors = dict(zip(chronological_order, corpus_color_scale))
bar_data = pd.concat([mean_composition_years.rename('year'),
                      summary.groupby(level='corpus').size().rename('pieces')],
                     axis=1
                    ).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='corpus',
             color_discrete_map=corpus_colors,
            height=350, width=800,
            )
fig.update_traces(width=5)
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "corpus_sizes.png"), scale=2)
fig.update_traces(width=5)
corpus_names = dict(
    beethoven_piano_sonatas='Beethoven Sonatas',
    chopin_mazurkas='Chopin Mazurkas',
    debussy_suite_bergamasque='Debussy Suite',
    dvorak_silhouettes="Dvořák Silhouettes",
    grieg_lyric_pieces="Grieg Lyric Pieces",
    liszt_pelerinage="Liszt Années",
    medtner_tales="Medtner Tales",
    schumann_kinderszenen="Schumann Kinderszenen",
    tchaikovsky_seasons="Tchaikovsky Seasons"
)
chronological_corpus_names = [corpus_names[corp] for corp in chronological_order]
corpus_name_colors = {corpus_names[corp]: color for corp, color in corpus_colors.items()}
all_annotations['corpus_name'] = all_annotations.index.get_level_values(0).map(corpus_names)
all_chords['corpus_name'] = all_chords.index.get_level_values(0).map(corpus_names)
bar_data = summary.reset_index().groupby(['composed_end', 'corpus']).size().rename('counts').reset_index()
px.bar(bar_data, x='composed_end', y='counts', color='corpus', color_discrete_map=corpus_colors)
hist_data = summary.reset_index()
hist_data.corpus = hist_data.corpus.map(corpus_names)
hist_data.head()
corpus fname TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb n_onsets n_onset_positions guitar_chord_count form_label_count label_count annotated_key harmony_version annotators reviewers composed_start composed_end composed_source composer workTitle movementNumber movementTitle workNumber poet lyricist arranger copyright creationDate mscVersion platform source translator title_text subtitle_text lyricist_text composer_text musescore ms3_version subdirectory rel_path has_drumset ambitus imslp musicbrainz viaf wikidata originalFormat staff_1_ambitus staff_1_instrument staff_2_ambitus staff_2_instrument score_integrity imslp.1 key mode typesetter text pdf score integrity comments staff_3_ambitus staff_3_instrument PDF staff_4_ambitus staff_4_instrument notes
0 Beethoven Sonatas 01-1 1: 2/2 1: -4 154 152 608.0 308.0 304.0 1216.0 NaN 1476.00 1679 985 0 0 241 f 2.3.0 Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) AN 1793 1795 OxfordMusicOnline Ludwig van Beethoven Sonata no. 1 1 Allegro op.2/1 NaN NaN NaN NaN 2019-03-05 3.02 Apple Macintosh NaN NaN Sonata no. 1 1. Allegro NaN Ludwig van Beethoven 3.6.2 1.1.1 MS3 MS3/01-1.mscx False 32-89 (Ab1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/a78520e0-0211-3b5... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 xml 51-89 (Eb3-F6) piano 32-73 (Ab1-Db5) piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1693
1 Beethoven Sonatas 01-2 1: 3/4 1: -1 62 61 183.0 124.0 122.0 366.0 NaN 526.17 1286 822 0 0 200 F 2.3.0 Lars & Ya-Chuan Adrian Nagel, Victor Zheng 1793 1795 OxfordMusicOnline Ludwig van Beethoven Sonata no. 1 2 Adagio op.2/1 NaN NaN NaN NaN 2019-03-05 3.02 Apple Macintosh NaN NaN Sonata no. 1 2. Adagio NaN Ludwig van Beethoven 3.6.2 1.1.1 MS3 MS3/01-2.mscx False 31-89 (G1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/bea1b893-2732-33a... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 xml 43-89 (G2-F6) piano 31-77 (G1-F5) piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1302
2 Beethoven Sonatas 01-3 1: 3/4 1: -4, 43: -1 77 73 219.0 196.0 186.0 558.0 NaN 565.50 636 310 0 0 132 f 2.3.0 Daniel Grote (2.2.0), Adrian Nagel (2.3.0) Adrian Nagel, Victor Zheng 1793 1795 OxfordMusicOnline Ludwig van Beethoven Sonata no. 1 3 Menuetto and Trio: Allegretto op.2/1 NaN NaN NaN NaN 2017-11-26 3.02 Microsoft Windows http://musescore.com/score/55335 NaN Sonata no. 1 3. Menuetto and Trio: Allegretto NaN Ludwig van Beethoven 3.6.2 1.1.1 MS3 MS3/01-3.mscx False 31-85 (G1-Db6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/2bd7e1ea-c696-3be... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 mxl 53-85 (F3-Db6) Piano 31-74 (G1-D5) Piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 639
3 Beethoven Sonatas 01-4 1: 2/2 1: -4 199 196 784.0 392.0 390.0 1560.0 (((57, 58), (59, 60, 61)),) 2326.83 3151 1789 0 0 355 f 2.3.0 Daniel Grote (2.2.0), Adrian Nagel (2.3.0) Adrian Nagel, Victor Zheng 1793 1795 OxfordMusicOnline Ludwig van Beethoven Sonata no. 1 4 Prestissimo op.2/1 NaN NaN NaN NaN 2017-11-26 3.02 Microsoft Windows NaN NaN Sonata no. 1 4. Prestissimo NaN Ludwig van Beethoven 3.6.2 1.1.1 MS3 MS3/01-4.mscx False 31-89 (G1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/b755e900-804a-312... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 mxl 50-89 (D3-F6) Piano 31-75 (G1-Eb5) Piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 3130
4 Beethoven Sonatas 02-1 1: 2/4 1: 3, 127: 0, 230: 3 342 336 672.0 672.0 664.0 1336.0 (((115, 116, 117, 118), (119, 120, 121, 122, 1... 1695.75 3034 1705 1 0 479 A 2.3.0 Lydia Carlisi (2.2.0), Victor Zheng (2.3.0) AN, VZ 1794 1795 OxfordMusicOnline Ludwig van Beethoven Sonata no. 2 1 Allegro vivace op.2/2 NaN NaN NaN NaN 2019-03-05 3.02 Apple Macintosh NaN NaN Sonata no. 2 1. Allegro vivace NaN Ludwig van Beethoven 3.6.2 1.1.1 MS3 MS3/02-1.mscx False 31-89 (G1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/c001a2eb-9493-327... https://viaf.org/viaf/179221580 https://www.wikidata.org/wiki/Q145699 xml 39-89 (D#2-F6) piano 31-76 (G1-E5) piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 3093
fig = px.histogram(hist_data, x='composed_end', color='corpus',
                   labels=dict(composed_end='decade',
                               count='pieces',
                              ),
                   color_discrete_map=corpus_name_colors,
                   width=1000, height=400,
                  )
fig.update_traces(xbins=dict(
    size=10
))
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "corpus_size_histogram.png"), scale=2)
fig.show()
summary.columns
Index(['TimeSig', 'KeySig', 'last_mc', 'last_mn', 'length_qb',
       'last_mc_unfolded', 'last_mn_unfolded', 'length_qb_unfolded',
       'volta_mcs', 'all_notes_qb', 'n_onsets', 'n_onset_positions',
       'guitar_chord_count', 'form_label_count', 'label_count',
       'annotated_key', 'harmony_version', 'annotators', 'reviewers',
       'composed_start', 'composed_end', 'composed_source', 'composer',
       'workTitle', 'movementNumber', 'movementTitle', 'workNumber', 'poet',
       'lyricist', 'arranger', 'copyright', 'creationDate', 'mscVersion',
       'platform', 'source', 'translator', 'title_text', 'subtitle_text',
       'lyricist_text', 'composer_text', 'musescore', 'ms3_version',
       'subdirectory', 'rel_path', 'has_drumset', 'ambitus', 'imslp',
       'musicbrainz', 'viaf', 'wikidata', 'originalFormat', 'staff_1_ambitus',
       'staff_1_instrument', 'staff_2_ambitus', 'staff_2_instrument',
       'score_integrity', 'imslp.1', 'key', 'mode', 'typesetter', 'text',
       'pdf', 'score integrity', 'comments', 'staff_3_ambitus',
       'staff_3_instrument', 'PDF', 'staff_4_ambitus', 'staff_4_instrument',
       'notes'],
      dtype='object')
corpus_metadata = summary.groupby(level=0)
n_pieces = corpus_metadata.size().rename('pieces')
absolute_numbers = dict(
    measures = corpus_metadata.last_mn.sum(),
    length = corpus_metadata.length_qb.sum(),
    notes = corpus_metadata.notes.sum(),
    labels = corpus_metadata.label_count.sum(),
)
absolute = pd.DataFrame.from_dict(absolute_numbers)
relative = absolute.div(n_pieces, axis=0)
complete_summary = pd.concat([pd.concat([n_pieces, absolute], axis=1), relative, absolute.iloc[:,2:].div(absolute.measures, axis=0)], axis=1, keys=['absolute', 'per piece', 'per measure'])
complete_summary = complete_summary.apply(pd.to_numeric).round(2)
complete_summary.index = complete_summary.index.map(corpus_names)
complete_summary.to_csv('romantic_summary.tsv', sep='\t')
complete_summary
absolute per piece per measure
pieces measures length notes labels measures length notes labels notes labels
corpus
Beethoven Sonatas 64 11662 35663.38 165948 21962 182.22 557.24 2592.94 343.16 14.23 1.88
Chopin Mazurkas 55 5089 14605.25 57201 9127 92.53 265.55 1040.02 165.95 11.24 1.79
Debussy Suite 4 421 1616.00 8210 1013 105.25 404.00 2052.50 253.25 19.50 2.41
Dvořák Silhouettes 12 674 1852.50 10649 1539 56.17 154.38 887.42 128.25 15.80 2.28
Grieg Lyric Pieces 66 5414 16485.25 65774 8231 82.03 249.78 996.58 124.71 12.15 1.52
Liszt Années 19 2625 9709.25 59534 5068 138.16 511.01 3133.37 266.74 22.68 1.93
Medtner Tales 19 2464 6598.00 42929 6730 129.68 347.26 2259.42 354.21 17.42 2.73
Schumann Kinderszenen 13 392 934.00 5223 948 30.15 71.85 401.77 72.92 13.32 2.42
Tchaikovsky Seasons 12 1250 3919.50 18751 3059 104.17 326.62 1562.58 254.92 15.00 2.45
sum_row = pd.DataFrame(complete_summary.sum(), columns=['sum']).T
sum_row.iloc[:,5:] = ''
summary_with_sum = pd.concat([complete_summary, sum_row])
summary_with_sum.loc[:, [('absolute', 'notes'), ('absolute', 'labels')]] = summary_with_sum[[('absolute', 'notes'), ('absolute', 'labels')]].astype(int)
summary_with_sum
/tmp/ipykernel_145367/1013929657.py:4: DeprecationWarning:

In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`
absolute per piece per measure
pieces measures length notes labels measures length notes labels notes labels
Beethoven Sonatas 64.0 11662.0 35663.38 165948 21962 182.22 557.24 2592.94 343.16 14.23 1.88
Chopin Mazurkas 55.0 5089.0 14605.25 57201 9127 92.53 265.55 1040.02 165.95 11.24 1.79
Debussy Suite 4.0 421.0 1616.00 8210 1013 105.25 404.0 2052.5 253.25 19.5 2.41
Dvořák Silhouettes 12.0 674.0 1852.50 10649 1539 56.17 154.38 887.42 128.25 15.8 2.28
Grieg Lyric Pieces 66.0 5414.0 16485.25 65774 8231 82.03 249.78 996.58 124.71 12.15 1.52
Liszt Années 19.0 2625.0 9709.25 59534 5068 138.16 511.01 3133.37 266.74 22.68 1.93
Medtner Tales 19.0 2464.0 6598.00 42929 6730 129.68 347.26 2259.42 354.21 17.42 2.73
Schumann Kinderszenen 13.0 392.0 934.00 5223 948 30.15 71.85 401.77 72.92 13.32 2.42
Tchaikovsky Seasons 12.0 1250.0 3919.50 18751 3059 104.17 326.62 1562.58 254.92 15.0 2.45
sum 264.0 29991.0 91383.13 434219 57677
summary[summary.ambitus.isna()]
TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb n_onsets n_onset_positions guitar_chord_count form_label_count label_count annotated_key harmony_version annotators reviewers composed_start composed_end composed_source composer workTitle movementNumber movementTitle workNumber poet lyricist arranger copyright creationDate mscVersion platform source translator title_text subtitle_text lyricist_text composer_text musescore ms3_version subdirectory rel_path has_drumset ambitus imslp musicbrainz viaf wikidata originalFormat staff_1_ambitus staff_1_instrument staff_2_ambitus staff_2_instrument score_integrity imslp.1 key mode typesetter text pdf score integrity comments staff_3_ambitus staff_3_instrument PDF staff_4_ambitus staff_4_instrument notes
corpus fname
summary.ambitus.str.extract(r"^(\d+)-(\d+)")
0 1
corpus fname
beethoven_piano_sonatas 01-1 32 89
01-2 31 89
01-3 31 85
01-4 31 89
02-1 31 89
... ... ... ...
tchaikovsky_seasons op37a08 35 91
op37a09 36 91
op37a10 37 82
op37a11 38 92
op37a12 39 92

264 rows × 2 columns

ambitus = summary.ambitus.str.extract(r"^(\d+)-(\d+)").astype(int)
ambitus.columns = ['low', 'high']
ambitus['range'] = ambitus.high - ambitus.low
ambitus.head()
low high range
corpus fname
beethoven_piano_sonatas 01-1 32 89 57
01-2 31 89 58
01-3 31 85 54
01-4 31 89 58
02-1 31 89 58
ambitus.groupby(level=0).high.max()
corpus
beethoven_piano_sonatas      101
chopin_mazurkas               98
debussy_suite_bergamasque     97
dvorak_silhouettes            97
grieg_lyric_pieces           102
liszt_pelerinage             102
medtner_tales                100
schumann_kinderszenen         91
tchaikovsky_seasons           93
Name: high, dtype: int64
ambitus.groupby(level=0).low.min()
corpus
beethoven_piano_sonatas      24
chopin_mazurkas              25
debussy_suite_bergamasque    24
dvorak_silhouettes           23
grieg_lyric_pieces           21
liszt_pelerinage             21
medtner_tales                21
schumann_kinderszenen        25
tchaikovsky_seasons          29
Name: low, dtype: int64
ambitus.groupby(level=0).range.max()
corpus
beethoven_piano_sonatas      72
chopin_mazurkas              68
debussy_suite_bergamasque    70
dvorak_silhouettes           69
grieg_lyric_pieces           74
liszt_pelerinage             81
medtner_tales                78
schumann_kinderszenen        60
tchaikovsky_seasons          64
Name: range, dtype: int64
ambitus.groupby(level=0).high.max() - ambitus.groupby(level=0).low.min()
corpus
beethoven_piano_sonatas      77
chopin_mazurkas              73
debussy_suite_bergamasque    73
dvorak_silhouettes           74
grieg_lyric_pieces           81
liszt_pelerinage             81
medtner_tales                79
schumann_kinderszenen        66
tchaikovsky_seasons          64
dtype: int64

Phrases

phrase_segmented = dc.PhraseSlicer().process_data(selected)
phrases = phrase_segmented.get_slice_info()
print(f"Overall number of phrases is {len(phrases.index)}")
phrases.head(20)
Overall number of phrases is 3596
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus fname phrase_slice
beethoven_piano_sonatas 01-1 [0.0, 32.0) 1 0 0 32.0 0 3/4 2/2 2 1 f.i{ NaN f i NaN i NaN i NaN NaN NaN NaN NaN { m True True (0, -3, 1) () 0 0 NaN NaN
[32.0, 64.0) 9 8 32 32.0 3/4 3/4 2/2 2 1 v{ NaN f i NaN v NaN v NaN NaN NaN NaN NaN { m True True (1, -2, 2) () 1 1 NaN NaN
[64.0, 72.0) 17 16 64 8.0 3/4 3/4 2/2 2 1 I6{ NaN f III NaN I6 NaN I NaN 6 NaN NaN NaN { M True False (4, 1, 0) () 0 4 NaN NaN
[72.0, 77.0) 19 18 72 5.0 3/4 3/4 2/2 2 1 I6{ NaN f III NaN I6 NaN I NaN 6 NaN NaN NaN { M True False (4, 1, 0) () 0 4 NaN NaN
[77.0, 163.0) 21 20 77 86.0 0 0 2/2 2 1 V[V|HC}{ NaN f III V V NaN V NaN NaN NaN NaN HC }{ M True False (1, 5, 2) () 1 1 NaN NaN
[163.0, 192.0) 42 41 163 29.0 1/2 1/2 2/2 2 1 viio7/V{ NaN f III NaN viio7/V NaN vii o 7 NaN V NaN { o7 True False (6, 3, 0, -3) () 6 6 NaN NaN
[192.0, 217.0) 50 48 192 25.0 0 3/4 2/2 2 1 { NaN f III NaN NaN NaN NaN NaN NaN NaN NaN NaN { NaN True False () () <NA> <NA> NaN NaN
[217.0, 249.0) 57 55 217 32.0 0 0 2/2 2 1 V[V|HC}{ NaN f iv V V NaN V NaN NaN NaN NaN HC }{ M True True (1, 5, 2) () 1 1 NaN NaN
[249.0, 321.0) 65 63 249 72.0 0 0 2/2 2 1 V[V|HC}{ NaN f v V V NaN V NaN NaN NaN NaN HC }{ M True True (1, 5, 2) () 1 1 NaN NaN
[321.0, 377.0) 83 81 321 56.0 0 0 2/2 2 1 V[V|HC}{ NaN f i V V NaN V NaN NaN NaN NaN HC }{ M True True (1, 5, 2) () 1 1 NaN NaN
[377.0, 401.0) 97 95 377 24.0 0 0 2/2 2 1 bII6(4)]{ NaN f i V bII6(4) NaN bII NaN 6 4 NaN NaN { M True True (1, -4, -5) () -5 1 NaN NaN
[401.0, 432.0) 103 101 401 31.0 0 0 2/2 2 1 i|IAC}{ NaN f i NaN i NaN i NaN NaN NaN NaN IAC }{ m True True (0, -3, 1) () 0 0 NaN NaN
[432.0, 473.0) 110 108 432 41.0 3/4 3/4 2/2 2 1 i{ NaN f i NaN i NaN i NaN NaN NaN NaN NaN { m True True (0, -3, 1) () 0 0 NaN NaN
[473.0, 559.0) 121 119 473 86.0 0 0 2/2 2 1 V[V|HC{ NaN f i V V NaN V NaN NaN NaN NaN HC { M True True (1, 5, 2) () 1 1 NaN NaN
[559.0, 608.0) 142 140 559 49.0 1/2 1/2 2/2 2 1 Ger6{ NaN f i NaN Ger6 Ger vii o 65 b3 V NaN { Ger True True (-4, 0, -3, 6) () 6 -4 NaN NaN
01-2 [0.0, 11.25) 1 0 0 11.25 0 1/2 3/4 2 1 F.V{ NaN F I NaN V NaN V NaN NaN NaN NaN NaN { M False False (1, 5, 2) () 1 1 NaN NaN
[11.25, 24.0) 5 4 45/4 12.75 5/16 5/16 3/4 2 1 { NaN F I NaN NaN NaN NaN NaN NaN NaN NaN NaN { NaN False False () () <NA> <NA> NaN NaN
[24.0, 35.25) 9 8 24 11.25 1/2 1/2 3/4 2 1 { NaN F I NaN NaN NaN NaN NaN NaN NaN NaN NaN { NaN False False () () <NA> <NA> NaN NaN
[35.25, 48.0) 13 12 141/4 12.75 5/16 5/16 3/4 2 1 { NaN F I NaN NaN NaN NaN NaN NaN NaN NaN NaN { NaN False False () () <NA> <NA> NaN NaN
[48.0, 60.0) 17 16 48 12.0 1/2 1/2 3/4 2 1 vi.V{ NaN F vi NaN V NaN V NaN NaN NaN NaN NaN { M False True (1, 5, 2) () 1 1 NaN NaN
phrase_segments = phrase_segmented.get_facet('expanded')
phrase_segments.to_csv('romantic_phrase_segments.tsv.zip', sep='\t')
phrase_segments
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus fname phrase_slice
beethoven_piano_sonatas 01-1 [0.0, 32.0) [0.0, 0.0) 1 0 0 0.0 0 3/4 2/2 2 1 f.i{ NaN f i NaN <NA> NaN <NA> <NA> <NA> NaN NaN NaN { <NA> True True <NA> <NA> <NA> <NA> NaN NaN
[0.0, 9.0) 1 0 0 9.0 0 3/4 2/2 2 1 f.i{ NaN f i NaN i NaN i NaN NaN NaN NaN NaN <NA> m True True (0, -3, 1) () 0 0 NaN NaN
[9.0, 17.0) 4 3 9 8.0 0 0 2/2 2 1 V65 NaN f i NaN V65 NaN V NaN 65 NaN NaN NaN NaN Mm7 True True (5, 2, -1, 1) () 1 5 NaN NaN
[17.0, 21.0) 6 5 17 4.0 0 0 2/2 2 1 i NaN f i NaN i NaN i NaN NaN NaN NaN NaN NaN m True True (0, -3, 1) () 0 0 NaN NaN
[21.0, 25.0) 7 6 21 4.0 0 0 2/2 2 1 #viio6 NaN f i NaN #viio6 NaN #vii o 6 NaN NaN NaN NaN o True True (2, -1, 5) () 5 2 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
tchaikovsky_seasons op37a12 [493.0, 528.0) [495.0, 499.0) 166 166 495 4.0 0 0 3/4 2 1 V(64) NaN Ab I NaN V(64) NaN V NaN NaN 64 NaN NaN NaN M False False (1, 0, 4) () 1 1 NaN NaN
[499.0, 501.0) 167 167 499 2.0 1/4 1/4 3/4 2 1 Ger7 NaN Ab I NaN Ger7 Ger vii o 7 b3 V NaN NaN Ger False False (6, -4, 0, -3) () 6 6 NaN NaN
[501.0, 504.0) 168 168 501 3.0 0 0 3/4 2 1 V(64) NaN Ab I NaN V(64) NaN V NaN NaN 64 NaN NaN NaN M False False (1, 0, 4) () 1 1 NaN NaN
[504.0, 504.0) 169 169 504 0.0 0 0 3/4 2 1 I|IAC} NaN Ab I NaN <NA> <NA> <NA> <NA> <NA> <NA> <NA> IAC } <NA> False False <NA> <NA> <NA> <NA> NaN NaN
[504.0, 528.0) 169 169 504 24.0 0 0 3/4 2 1 I|IAC} NaN Ab I NaN I NaN I NaN NaN NaN NaN <NA> <NA> M False False (0, 4, 1) () 0 0 NaN NaN

65082 rows × 32 columns

phrases[phrases.duration_qb > 50]
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus fname phrase_slice
beethoven_piano_sonatas 01-1 [77.0, 163.0) 21 20 77 86.0 0 0 2/2 2 1 V[V|HC}{ NaN f III V V NaN V NaN NaN NaN NaN HC }{ M True False (1, 5, 2) () 1 1 NaN NaN
[249.0, 321.0) 65 63 249 72.0 0 0 2/2 2 1 V[V|HC}{ NaN f v V V NaN V NaN NaN NaN NaN HC }{ M True True (1, 5, 2) () 1 1 NaN NaN
[321.0, 377.0) 83 81 321 56.0 0 0 2/2 2 1 V[V|HC}{ NaN f i V V NaN V NaN NaN NaN NaN HC }{ M True True (1, 5, 2) () 1 1 NaN NaN
[473.0, 559.0) 121 119 473 86.0 0 0 2/2 2 1 V[V|HC{ NaN f i V V NaN V NaN NaN NaN NaN HC { M True True (1, 5, 2) () 1 1 NaN NaN
02-1 [182.5, 242.0) 93 92 365/2 59.5 0 0 2/4 2 1 I|PAC}{ NaN A V NaN I NaN I NaN NaN NaN NaN PAC }{ M False False (0, 4, 1) () 0 0 <NA> NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
medtner_tales op48n02 [268.0, 319.0) 91 90 268 51.0 0 0 3/4 2 1 i{ NaN g v NaN i NaN i NaN NaN NaN NaN NaN { m True True (0, -3, 1) () 0 0 NaN NaN
[493.0, 559.0) 166 165 493 66.0 0 0 3/4 2 1 i[i}{ NaN g i i i NaN i NaN NaN NaN NaN NaN }{ m True True (0, -3, 1) () 0 0 NaN NaN
tchaikovsky_seasons op37a02 [284.0, 338.0) 143 143 284 54.0 0 0 2/4 2 1 bVII.V65{ NaN D bVII NaN V65 NaN V NaN 65 NaN NaN NaN { Mm7 False False (5, 2, -1, 1) () 1 5 NaN NaN
op37a05 [222.0, 274.5) 77 77 222 52.5 0 0 9/8 2 1 bIII64{ NaN G I NaN bIII64 NaN bIII NaN 64 NaN NaN NaN { M False False (-2, -3, 1) () -3 -2 NaN NaN
op37a07 [171.0, 224.0) 43 43 171 53.0 3/4 3/4 4/4 2 1 V7(9){ NaN Eb I NaN V7(9) NaN V NaN 7 9 NaN NaN { Mm7 False False (1, 5, 2, -1) (3,) 1 1 NaN NaN

291 rows × 32 columns

phrase2timesigs = phrase_segments.groupby(level=[0,1,2]).timesig.unique()
n_timesignatures_per_phrase = phrase2timesigs.map(len)
uniform_timesigs = phrase2timesigs[n_timesignatures_per_phrase == 1].map(lambda l: l[0])
more_than_one = n_timesignatures_per_phrase > 1
print(f"Filtered out the {more_than_one.sum()} phrases incorporating more than one time signature.")
n_timesigs = n_timesignatures_per_phrase.value_counts()
display(n_timesigs.reset_index().rename(columns=dict(index='#time signatures', timesig='#phrases')))
uniform_timesig_phrases = phrases.loc[uniform_timesigs.index]
timesig_in_quarterbeats = uniform_timesigs.map(Fraction) * 4
exact_measure_lengths = uniform_timesig_phrases.duration_qb / timesig_in_quarterbeats
uniform_timesigs = pd.concat([exact_measure_lengths.rename('duration_measures'), uniform_timesig_phrases], axis=1)
fig = px.histogram(uniform_timesigs, x='duration_measures', log_y=True,
                   labels=dict(duration_measures='phrase length bin in number of measures'),
                   color_discrete_sequence=corpus_color_scale,
                   height=400,
                   width = 1000,
                  )
fig.update_traces(xbins=dict( # bins used for histogram
        #start=0.0,
        #end=100.0,
        size=1
    ))
fig.update_layout(**STD_LAYOUT)
fig.update_xaxes(dtick=4, gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "phrase_lengths.png"), scale=2)
fig.show()
Filtered out the 53 phrases incorporating more than one time signature.
#time signatures #phrases
0 1 3543
1 2 52
2 3 1
uniform_timesigs[uniform_timesigs.duration_measures > 80]
duration_measures mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus fname phrase_slice

Keys

from ms3 import roman_numeral2fifths, transform, resolve_all_relative_numerals, replace_boolean_mode_by_strings
keys_segmented = dc.LocalKeySlicer().process_data(selected)
keys = keys_segmented.get_slice_info()
keys.to_csv('romantic_keys.tsv.zip', sep='\t')
print(f"Overall number of key segments is {len(keys.index)}")
keys["localkey_fifths"] = transform(keys, roman_numeral2fifths, ['localkey', 'globalkey_is_minor'])
keys.head(20)
Overall number of key segments is 1477
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend localkey_fifths
corpus fname localkey_slice
beethoven_piano_sonatas 01-1 [0.0, 41.0) 1 0 0 41.0 0 3/4 2/2 2 1 f.i{ NaN f i NaN i NaN i NaN NaN NaN NaN NaN { m True True (0, -3, 1) () 0 0 NaN NaN 0
[41.0, 209.0) 12 11 41 168.0 0 0 2/2 2 1 III.IVM2 ii7(2) f III NaN IVM2 NaN IV M 2 NaN NaN NaN NaN MM7 True False (4, -1, 3, 0) () -1 4 NaN NaN -3
[209.0, 247.0) 55 53 209 38.0 0 0 2/2 2 1 iv.viio65/V NaN f iv NaN viio65/V NaN vii o 65 NaN V NaN NaN o7 True True (3, 0, -3, 6) () 6 3 NaN NaN -1
[247.0, 276.0) 64 62 247 29.0 1/2 1/2 2/2 2 1 v.It6 NaN f v NaN It6 It vii o 6 b3 V NaN NaN It True True (-4, 0, 6) () 6 -4 NaN NaN 1
[276.0, 284.0) 71 69 276 8.0 3/4 3/4 2/2 2 1 iv.iio64 NaN f iv NaN iio64 NaN ii o 64 NaN NaN NaN NaN o True True (-4, 2, -1) () 2 -4 NaN NaN -1
[284.0, 307.0) 73 71 284 23.0 3/4 3/4 2/2 2 1 III.iio64 NaN f III NaN iio64 NaN ii o 64 NaN NaN NaN NaN o True False (-4, 2, -1) () 2 -4 NaN NaN -3
[307.0, 608.0) 79 77 307 301.0 1/2 1/2 2/2 2 1 i.V NaN f i NaN V NaN V NaN NaN NaN NaN NaN NaN M True True (1, 5, 2) () 1 1 NaN NaN 0
01-2 [0.0, 48.0) 1 0 0 48.0 0 1/2 3/4 2 1 F.V{ NaN F I NaN V NaN V NaN NaN NaN NaN NaN { M False False (1, 5, 2) () 1 1 NaN NaN 0
[48.0, 61.0) 17 16 48 13.0 1/2 1/2 3/4 2 1 vi.V{ NaN F vi NaN V NaN V NaN NaN NaN NaN NaN { M False True (1, 5, 2) () 1 1 NaN NaN 3
[61.0, 91.5) 22 21 61 30.5 0 0 3/4 2 1 V.V7 NaN F V NaN V7 NaN V NaN 7 NaN NaN NaN NaN Mm7 False False (1, 5, 2, -1) () 1 1 NaN NaN 1
[91.5, 183.0) 32 31 183/2 91.5 1/8 1/8 3/4 2 1 I.V7{ NaN F I NaN V7 NaN V NaN 7 NaN NaN NaN { Mm7 False False (1, 5, 2, -1) () 1 1 NaN NaN 0
01-3 [0.0, 12.0) 1 0 0 12.0 0 1/2 3/4 2 1 f.i[i{ NaN f i i i NaN i NaN NaN NaN NaN NaN { m True True (0, -3, 1) () 0 0 NaN NaN 0
[12.0, 49.0) 5 4 12 37.0 1/2 1/2 3/4 2 1 III.I[I{ NaN f III I I NaN I NaN NaN NaN NaN NaN { M True False (0, 4, 1) () 0 0 NaN NaN -3
[49.0, 76.0) 19 17 49 27.0 0 0 3/4 2 1 iv.#viio7 NaN f iv NaN #viio7 NaN #vii o 7 NaN NaN NaN NaN o7 True True (5, 2, -1, -4) () 5 5 NaN NaN -1
[76.0, 120.0) 28 26 76 44.0 0 0 3/4 2 1 i.#viio NaN f i NaN #viio NaN #vii o NaN NaN NaN NaN NaN o True True (5, 2, -1) () 5 5 NaN NaN 0
[120.0, 138.0) 43 40 120 18.0 0 1/2 3/4 2 1 I.I{ NaN f I NaN I NaN I NaN NaN NaN NaN NaN { M True False (0, 4, 1) () 0 0 NaN NaN 0
[138.0, 157.0) 49 46 138 19.0 1/2 1/2 3/4 2 1 V.V65 NaN f V NaN V65 NaN V NaN 65 NaN NaN NaN NaN Mm7 True False (5, 2, -1, 1) () 1 5 NaN NaN 1
[157.0, 219.0) 57 53 157 62.0 0 0 3/4 2 1 I.V7 NaN f I V V7 NaN V NaN 7 NaN NaN NaN NaN Mm7 True False (1, 5, 2, -1) () 1 1 NaN NaN 0
01-4 [0.0, 47.0) 1 0 0 47.0 0 0 2/2 2 1 f.i{ NaN f i NaN i NaN i NaN NaN NaN NaN NaN { m True True (0, -3, 1) () 0 0 <NA> NaN 0
[47.0, 224.0) 13 12 47 177.0 3/4 3/4 2/2 2 1 v.V65/V NaN f v NaN V65/V NaN V NaN 65 NaN V NaN NaN Mm7 True True (6, 3, 0, 2) () 2 6 <NA> NaN 1
keys.duration_qb.sum()
91369.25
phrases.duration_qb.sum()
91328.25
key_durations = keys.groupby(['globalkey_is_minor', 'localkey']).duration_qb.sum().sort_values(ascending=False)
print(f"{len(key_durations)} keys overall including hierarchical such as 'III/v'.")
87 keys overall including hierarchical such as 'III/v'.
keys_resolved = resolve_all_relative_numerals(keys)
key_resolved_durations = keys_resolved.groupby(['globalkey_is_minor', 'localkey']).duration_qb.sum().sort_values(ascending=False)
print(f"{len(key_resolved_durations)} keys overall after resolving hierarchical ones.")
key_resolved_durations
62 keys overall after resolving hierarchical ones.
globalkey_is_minor  localkey
False               I           36743.208333
True                i              24815.625
False               V                 3491.5
True                I            3421.666667
                    III               3333.0
                                    ...
False               bI                  22.0
                    bvii                12.0
                    #I                  11.0
                    bv                   8.0
                    bii                  4.0
Name: duration_qb, Length: 62, dtype: object
pie_data = replace_boolean_mode_by_strings(key_resolved_durations.reset_index())
px.pie(pie_data, names='localkey', values='duration_qb', facet_col='globalkey_mode', height=700)
localkey_fifths_durations = keys.groupby(['localkey_fifths', 'localkey_is_minor']).duration_qb.sum()
# sort by stacked bar length:
localkey_fifths_durations = localkey_fifths_durations.sort_values(key=lambda S: S.index.get_level_values(0).map(S.groupby(level=0).sum()), ascending=False)
bar_data = replace_boolean_mode_by_strings(localkey_fifths_durations.reset_index())
bar_data.localkey_fifths = bar_data.localkey_fifths.map(ms3.fifths2iv)
fig = px.bar(bar_data, x='localkey_fifths', y='duration_qb', color='localkey_mode', log_y=True, barmode='group',
             labels=dict(localkey_fifths='Roots of local keys as intervallic distance from the global tonic',
                   duration_qb='total duration in quarter notes',
                   localkey_mode='mode'
                  ),
             color_discrete_sequence=corpus_color_scale,
             width=1000)
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "key_segments.png"), scale=2)
fig.show()
localkey_fifths_durations = keys.groupby(['localkey_fifths', 'localkey_is_minor']).duration_qb.sum()
# sort by stacked bar length:
bar_data = replace_boolean_mode_by_strings(localkey_fifths_durations.reset_index())
bar_data.localkey_fifths = bar_data.localkey_fifths.map(ms3.fifths2iv)
fig = px.bar(bar_data, x='localkey_fifths', y='duration_qb', color='localkey_mode', log_y=True, barmode='group',
             labels=dict(localkey_fifths='Roots of local keys as intervallic distance from the global tonic',
                   duration_qb='total duration in quarter notes',
                   localkey_mode='mode'
                  ),
             color_discrete_sequence=corpus_color_scale,
             width=1000)
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "key_segments_line_of_fifths.png"), scale=2)
fig.show()
localkey_fifths_durations_stacked = localkey_fifths_durations.groupby(level=0).sum().sort_values()
pd.concat([localkey_fifths_durations_stacked, localkey_fifths_durations_stacked.rename('fraction') / localkey_fifths_durations_stacked.sum()], axis=1)
duration_qb fraction
localkey_fifths
10 24.0 0.000263
-9 24.0 0.000263
7 41.0 0.000449
-11 64.5 0.000706
6 67.5 0.000739
9 70.5 0.000772
-7 134.0 0.001467
-8 157.0 0.001718
8 161.0 0.001762
-6 371.0 0.00406
5 504.0 0.005516
2 566.083333 0.006196
-5 587.75 0.006433
-2 1186.5 0.012986
3 1453.208333 0.015905
4 2347.458333 0.025692
-1 2593.0 0.028379
-4 2995.625 0.032786
-3 4728.25 0.051749
1 6084.625 0.066594
0 67208.25 0.735567
keys[keys.localkey_fifths == -9]
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend localkey_fifths
corpus fname localkey_slice
liszt_pelerinage 161.01_Sposalizio [362.0, 386.0) 62 62 362 24.0 0 0 6/4 2 1 bbVII.V(+964) NaN E bbVII NaN V(+964) NaN V NaN NaN +964 NaN NaN NaN M False False (1, 0, 4) (3,) 1 1 NaN NaN -9
keys[keys.localkey_fifths == 10]
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend localkey_fifths
corpus fname localkey_slice
liszt_pelerinage 161.04_Sonetto_47_del_Petrarca [140.0, 164.0) 28 28 140 24.0 0 0 6/4 2 1 #VI.viio43{ NaN Db #VI NaN viio43 NaN vii o 43 NaN NaN NaN { o7 False False (-1, -4, 5, 2) () 5 -1 NaN NaN 10

Cadences

all_annotations.cadence.value_counts()
PAC    1289
HC      895
IAC     631
PC       61
EC       47
DC       30
Name: cadence, dtype: int64
all_annotations.groupby("corpus_name").cadence.value_counts()
corpus_name            cadence
Beethoven Sonatas      PAC        603
                       HC         406
                       IAC        275
                       EC          31
                       DC          15
                       PC           3
Chopin Mazurkas        PAC        197
                       HC          64
                       IAC         56
                       PC           8
                       EC           3
                       DC           1
Debussy Suite          HC           8
                       IAC          8
                       PAC          7
                       DC           3
                       PC           2
                       EC           1
Dvořák Silhouettes     IAC         49
                       PAC         46
                       HC          32
                       DC           6
                       EC           3
                       PC           3
Grieg Lyric Pieces     PAC        190
                       HC         147
                       IAC         65
                       PC          23
                       DC           4
Liszt Années           PAC         89
                       HC          88
                       IAC         74
                       PC          13
                       EC           8
Medtner Tales          PAC         76
                       HC          52
                       IAC         29
                       PC           3
Schumann Kinderszenen  HC          32
                       PAC         30
                       IAC         14
                       EC           1
Tchaikovsky Seasons    HC          66
                       IAC         61
                       PAC         51
                       PC           6
                       DC           1
Name: cadence, dtype: int64
cadence_count_per_corpus = all_annotations.groupby("corpus_name").cadence.value_counts().sort_values(ascending=False)
cadence_count_per_corpus.groupby(level=0).sum()
corpus_name
Beethoven Sonatas        1333
Chopin Mazurkas           329
Debussy Suite              29
Dvořák Silhouettes        139
Grieg Lyric Pieces        429
Liszt Années              272
Medtner Tales             160
Schumann Kinderszenen      77
Tchaikovsky Seasons       185
Name: cadence, dtype: int64
cadence_fraction_per_corpus = cadence_count_per_corpus / cadence_count_per_corpus.groupby(level=0).sum()
fig = px.bar(cadence_fraction_per_corpus.rename('count').reset_index(), x='corpus_name', y='count', color='cadence',
             labels=dict(count='fraction', corpus=''),
             height=400, width=900,
       category_orders=dict(corpus_name=chronological_corpus_names))
      #color_discrete_map=cadence_colors, 

fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "cadences.png"), scale=2)
fig.show()

Harmony labels

Unigrams

For computing unigram statistics, the tokens need to be grouped by their occurrence within a major or a minor key because this changes their meaning. To that aim, the annotated corpus needs to be sliced into contiguous localkey segments which are then grouped into a major (is_minor=False) and a minor group.

root_durations = all_chords[all_chords.root.between(-5,6)].groupby(['root', 'chord_type']).duration_qb.sum()
# sort by stacked bar length:
#root_durations = root_durations.sort_values(key=lambda S: S.index.get_level_values(0).map(S.groupby(level=0).sum()), ascending=False)
bar_data = root_durations.reset_index()
bar_data.root = bar_data.root.map(ms3.fifths2iv)
px.bar(bar_data, x='root', y='duration_qb', color='chord_type')
relative_roots = all_chords[['numeral', 'duration_qb', 'relativeroot', 'localkey_is_minor', 'chord_type']].copy()
relative_roots['relativeroot_resolved'] = transform(relative_roots, ms3.resolve_relative_keys, ['relativeroot', 'localkey_is_minor'])
has_rel = relative_roots.relativeroot_resolved.notna()
relative_roots.loc[has_rel, 'localkey_is_minor'] = relative_roots.loc[has_rel, 'relativeroot_resolved'].str.islower()
relative_roots['root'] = transform(relative_roots, roman_numeral2fifths, ['numeral', 'localkey_is_minor'])
chord_type_frequency = all_chords.chord_type.value_counts()
replace_rare = ms3.map_dict({t: 'other' for t in chord_type_frequency[chord_type_frequency < 500].index})
relative_roots['type_reduced'] = relative_roots.chord_type.map(replace_rare)
#is_special = relative_roots.chord_type.isin(('It', 'Ger', 'Fr'))
#relative_roots.loc[is_special, 'root'] = -4
root_durations = relative_roots.groupby(['root', 'type_reduced']).duration_qb.sum().sort_values(ascending=False)
bar_data = root_durations.reset_index()
bar_data.root = bar_data.root.map(ms3.fifths2iv)
root_order = bar_data.groupby('root').duration_qb.sum().sort_values(ascending=False).index.to_list()
type_colors = dict(zip(('Mm7', 'M', 'o7', 'o', 'mm7', 'm', '%7', 'MM7', 'other'), colorlover.scales['9']['qual']['Paired']))
fig = px.bar(bar_data, x='root', y='duration_qb', color='type_reduced', barmode='group', log_y=True,
             color_discrete_map=type_colors,
             category_orders=dict(root=root_order,
                                  type_reduced=relative_roots.type_reduced.value_counts().index.to_list(),
                                 ),
            labels=dict(root="intervallic difference between chord root to the local or secondary tonic",
                        duration_qb="duration in quarter notes",
                        type_reduced="chord type",
                       ),
             width=1000,
             height=400,
            )
fig.update_layout(**STD_LAYOUT,
                  legend=dict(
                      orientation='h',
                      xanchor="right",
                      x=1,
                      y=1,
                  )
                 )
fig.update_yaxes(gridcolor='lightgrey')
fig.write_image(os.path.join(OUTPUT_DIR, "chord_roots.png"), scale=2)
fig.show()
print(f"Reduced to {len(set(bar_data.iloc[:,:2].itertuples(index=False, name=None)))} types. Paper cites the sum of types in major and types in minor (see below), treating them as distinct.")
Reduced to 123 types. Paper cites the sum of types in major and types in minor (see below), treating them as distinct.
dim_or_aug = bar_data[bar_data.root.str.startswith("a") | bar_data.root.str.startswith("d")].duration_qb.sum()
complete = bar_data.duration_qb.sum()
print(f"On diminished or augmented scale degrees: {dim_or_aug} / {complete} = {dim_or_aug / complete}")
On diminished or augmented scale degrees: 371.50000000000017 / 91369.25 = 0.0040659193328171144
mode_slices = dc.ModeGrouper().process_data(keys_segmented)

Whole dataset

mode_slices.get_slice_info()
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
localkey_is_minor corpus fname localkey_slice
False beethoven_piano_sonatas 01-1 [41.0, 209.0) 12 11 41 168.0 0 0 2/2 2 1 III.IVM2 ii7(2) f III NaN IVM2 NaN IV M 2 NaN NaN NaN NaN MM7 True False (4, -1, 3, 0) () -1 4 NaN NaN
[284.0, 307.0) 73 71 284 23.0 3/4 3/4 2/2 2 1 III.iio64 NaN f III NaN iio64 NaN ii o 64 NaN NaN NaN NaN o True False (-4, 2, -1) () 2 -4 NaN NaN
01-2 [0.0, 48.0) 1 0 0 48.0 0 1/2 3/4 2 1 F.V{ NaN F I NaN V NaN V NaN NaN NaN NaN NaN { M False False (1, 5, 2) () 1 1 NaN NaN
[61.0, 91.5) 22 21 61 30.5 0 0 3/4 2 1 V.V7 NaN F V NaN V7 NaN V NaN 7 NaN NaN NaN NaN Mm7 False False (1, 5, 2, -1) () 1 1 NaN NaN
[91.5, 183.0) 32 31 183/2 91.5 1/8 1/8 3/4 2 1 I.V7{ NaN F I NaN V7 NaN V NaN 7 NaN NaN NaN { Mm7 False False (1, 5, 2, -1) () 1 1 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
True tchaikovsky_seasons op37a11 [155.0, 166.0) 39 39 155 11.0 3/4 3/4 4/4 2 1 iv.V NaN E iv NaN V NaN V NaN NaN NaN NaN NaN NaN M False True (1, 5, 2) () 1 1 NaN NaN
[171.0, 180.0) 43 43 171 9.0 3/4 3/4 4/4 2 1 v.ii%43 NaN E v NaN ii%43 NaN ii % 43 NaN NaN NaN NaN %7 False True (-4, 0, 2, -1) () 2 -4 NaN NaN
[224.0, 232.0) 57 57 224 8.0 0 0 4/4 2 1 iii.V(64) NaN E iii NaN V(64) NaN V NaN NaN 64 NaN NaN NaN M False True (1, 0, -3) () 1 1 NaN NaN
op37a12 [303.0, 309.0) 102 102 303 6.0 0 0 3/4 2 1 iii/bVI.V(#2) NaN Ab iii/bVI NaN V(#2) NaN V NaN NaN #2 NaN NaN NaN M False True (1, 3, 2) () 1 1 NaN NaN
[399.0, 442.0) 134 134 399 43.0 0 0 3/4 2 1 iii/bVI.V7(#2) NaN Ab iii/bVI v V7(#2) NaN V NaN 7 #2 NaN NaN NaN Mm7 False True (1, 3, 2, -1) () 1 1 NaN NaN

1477 rows × 32 columns

unigrams = dc.ChordSymbolUnigrams().process_data(mode_slices)
unigrams.get_group_results()
I V7 V I6 IV V(64) V43 V2 V6 vi ii ii6 V65 IV6 I64 V7(9) viio6 V7/V ii7 V7/IV V7(6) V64 ii65 vi6 iii viio i vi7 V65/V V7(4) I(9) viio7/V V/vi V(4) I(4) viio7 V2/V iii6 viio43 iv V2/IV V7/ii V43/V V7/vi IVM7 IV64 V(6) iv6 I/V viio2 ... V6(#6) V6(#6)/ii/III V6(#96)/vi/III V6(11) V6(11)/V V6(9)/iv V6(b5)/V V6/#iv V7(#2)/bII V7(#62)/iv VI(13) V7(94)/iv V7(96)/VI V7(96)/VII V7(b2)/iv V7(b3)/#iii V7(b3)/ii V7(b5)/bii V7(b6)/VI V7(b9)/bII V7(b9)/ii/III V7(b9)/iii V7(b9b6)/III V7/#vii V7/III/iv V7/V/#iii V7/V/iii VI(#2) VI(#9) VI(11) V7(94)/bv V7(9+4)/VII V7(#94) V7(#96#4)/VI V7(#96) V7(+4)/V V7(+96)/III V7(+b3)/V V7(1394)/V V7(13b5) V7(2)/VII V7(2)/iv V7(6)/VI V7(6+4)/VI V7(64)/VII V7(64^2)/v V7(6^2)/III V7(6b2)/V V7(6b2)/bII V7(9)/bv
localkey_is_minor
False 5206 2579 2164 1753 970 707 669 645 636 602 559 545 534 467 448 366 346 329 278 230 200 194 191 184 180 178 175 163 157 146 143 137 129 129 127 122 121 117 117 113 110 109 107 107 104 104 99 95 92 89 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
True 235 1190 1410 47 84 462 304 216 356 42 45 19 229 38 23 92 11 137 18 20 99 153 13 18 12 7 2703 1 49 54 8 151 6 79 9 1 33 22 3 480 6 9 20 8 4 14 77 301 26 4 ... 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

2 rows × 3125 columns

modes = {True: 'MINOR', False: 'MAJOR'}
for (is_minor,), ugs in unigrams.iter_group_results():
    print(f"{modes[is_minor]} UNIGRAMS\n{ugs.shape[0]} types, {ugs.sum()} tokens")
    print(ugs.head(20).to_string())
MAJOR UNIGRAMS
2008 types, 35438 tokens
I        5206
V7       2579
V        2164
I6       1753
IV        970
V(64)     707
V43       669
V2        645
V6        636
vi        602
ii        559
ii6       545
V65       534
IV6       467
I64       448
V7(9)     366
viio6     346
V7/V      329
ii7       278
V7/IV     230
MINOR UNIGRAMS
1717 types, 21317 tokens
i          2703
V          1410
V7         1190
i6          913
iv          480
V(64)       462
V6          356
VI          316
V43         304
iv6         301
i64         237
I           235
V65         229
V2          216
III         212
#viio43     209
bII         164
v           158
V7/III      158
#viio7      154

Per corpus

corpus_wise_unigrams = dc.Pipeline([dc.CorpusGrouper(), dc.ChordSymbolUnigrams()]).process_data(mode_slices)
corpus_wise_unigrams.get_group_results()
I V I6 V7 V6 V2 V43 V(64) IV ii6 V65 viio6 IV6 ii vi I64 viio i V65/V vi6 viio7/V iii6 V7/V V64 i6 ii65 V7/IV viio/V V2/IV V2/V V43/V viio43 V/vi V7(4) V(4) V6/V iii iv V/V viio6/V V65/IV viio64 V65/ii iv6 V7/ii #viio7/vi V43/IV viio7 V(b64) viio2 ... V7(6+4)/VI V64(#7b5) V6(9)/iv V6/bv V64/bIV V64(+6)/iv V64(4)/bII V64(9)/VII V6(11)/V V6(11) V6(#96)/vi/III V6(#6)/ii/III V6(#6) V43/ii/III V43/bIV V43/IV/III V43(b5)/VI V43(6)/III V7(6)/VI V7(+4)/V V7(#2)/bII V7(#5)/bII/iv V7(#96#4)/VI V7(+96)/III V7(+b3)/V V7(13b5) V7(2)/VII V7(#11#96)/VI V65/V/III V65(b9)/VII V65(b9)/VI V65(9)/ii/III V65(9)/VII V65(13)/III V65(+6) #viio6/V #viio6(2)/V V7(13)/III iv64(#7) iio(9) i64(2) #viio65(2)/V i64(^2) V(9#764) i6(6#4) iv64(94) i(+4) i(97) V2(6)/bII i(4)/v
localkey_is_minor corpus
False beethoven_piano_sonatas 2208 1178 1122 1027 466 443 440 437 394 391 335 274 263 235 226 207 139 131 107 93 86 81 77 75 72 71 69 66 61 58 57 57 55 51 49 48 48 47 47 47 46 44 43 43 42 42 42 39 38 38 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chopin_mazurkas 954 251 67 656 25 36 18 111 183 66 48 6 48 74 75 49 16 15 24 24 10 3 133 27 3 30 63 9 1 30 6 11 22 10 23 4 46 10 4 6 5 5 3 16 42 11 3 39 4 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
debussy_suite_bergamasque 24 4 9 21 4 6 3 0 10 3 5 6 12 10 4 3 0 0 0 2 0 7 1 4 0 9 1 0 0 0 0 0 0 2 0 0 19 0 1 0 0 0 0 0 0 1 5 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
dvorak_silhouettes 254 79 103 89 8 32 12 9 46 5 9 29 7 15 20 2 10 8 7 8 0 2 7 1 2 3 6 0 2 3 5 6 4 3 2 0 4 5 4 0 1 0 0 2 3 2 0 4 0 4 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
grieg_lyric_pieces 566 224 116 264 25 39 78 26 127 14 31 10 45 53 116 68 6 8 2 13 1 5 57 41 0 31 20 0 16 17 24 5 21 14 20 0 33 12 9 1 0 1 0 9 8 2 11 7 0 3 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
liszt_pelerinage 501 155 109 250 30 23 19 46 108 28 35 1 38 70 85 43 1 8 2 12 23 3 21 9 3 6 35 2 9 7 8 15 6 36 11 2 5 21 3 6 0 1 2 18 6 8 2 30 4 13 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
medtner_tales 263 139 83 122 24 21 15 11 61 19 12 14 27 52 44 37 4 5 7 23 3 11 26 13 6 24 23 2 10 3 1 2 18 17 7 0 12 11 8 4 0 0 4 6 4 2 3 0 0 6 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
schumann_kinderszenen 106 65 69 50 5 10 30 22 23 10 19 4 10 13 9 7 2 0 0 4 8 0 7 10 0 8 9 0 2 0 2 0 3 3 2 0 7 1 4 0 2 0 1 1 2 1 2 2 0 2 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tchaikovsky_seasons 330 69 75 100 49 35 54 45 18 9 40 2 17 37 23 32 0 0 8 5 6 5 0 14 1 9 4 5 9 3 4 21 0 10 15 3 6 6 3 1 0 0 0 0 2 5 6 1 0 23 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
True beethoven_piano_sonatas 121 584 33 389 192 91 151 260 22 12 118 7 14 24 24 4 6 857 31 13 86 12 8 46 350 3 4 24 4 8 9 2 4 10 15 11 8 201 11 10 3 0 0 120 2 2 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chopin_mazurkas 46 249 3 339 32 26 44 49 6 1 25 0 3 1 0 1 0 511 10 3 20 2 77 29 60 5 12 0 0 9 1 0 0 9 15 2 1 59 36 2 0 0 0 54 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
debussy_suite_bergamasque 7 9 2 8 0 4 8 0 15 0 3 0 2 7 3 2 0 57 5 1 0 1 1 0 16 1 0 1 1 1 2 0 0 1 0 0 2 15 1 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
dvorak_silhouettes 5 42 0 50 10 4 1 20 0 0 3 3 1 0 0 0 0 101 0 0 1 0 4 11 34 0 0 4 0 0 0 0 0 0 0 1 0 20 2 0 0 0 0 8 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
grieg_lyric_pieces 26 211 1 110 16 22 34 6 22 0 2 0 8 5 1 6 0 380 0 0 6 2 22 32 82 0 2 0 0 7 0 0 1 2 23 0 0 58 16 0 0 0 7 38 3 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
liszt_pelerinage 21 99 5 60 37 8 19 44 10 3 13 0 1 2 10 10 0 159 1 0 11 2 4 13 100 2 2 7 1 4 3 1 0 1 5 2 0 19 0 0 0 0 1 25 3 0 3 1 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
medtner_tales 5 131 3 109 38 32 23 36 9 3 9 1 3 6 4 0 1 346 2 1 21 3 19 14 175 2 0 2 0 2 5 0 1 29 15 2 1 77 9 1 0 0 1 50 1 1 0 0 0 2 ... 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tchaikovsky_seasons 4 72 0 110 21 29 24 36 0 0 53 0 6 0 0 0 0 247 0 0 6 0 1 8 87 0 0 4 0 2 0 0 0 2 6 0 0 22 0 2 0 0 0 6 0 0 0 0 0 2 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 2 2 2 2 1 1
schumann_kinderszenen 0 13 0 15 10 0 0 11 0 0 3 0 0 0 0 0 0 45 0 0 0 0 1 0 9 0 0 0 0 0 0 0 0 0 0 0 0 9 0 1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 2 1 1 1 1 0 0 0 0 0 0 0 0

18 rows × 3125 columns

for (is_minor, corpus_name), ugs in corpus_wise_unigrams.iter_group_results():
    print(f"{corpus_name} {modes[is_minor]} unigrams ({ugs.shape[0]} types, {ugs.sum()} tokens)")
    print(ugs.head(5).to_string())
beethoven_piano_sonatas MAJOR unigrams (839 types, 15130 tokens)
I     2208
V     1178
I6    1122
V7    1027
V6     466
chopin_mazurkas MAJOR unigrams (460 types, 5257 tokens)
I       954
V7      656
V       251
IV      183
V7/V    133
debussy_suite_bergamasque MAJOR unigrams (94 types, 335 tokens)
I         24
V7        21
iii       19
V7(+2)    12
IV6       12
dvorak_silhouettes MAJOR unigrams (140 types, 1067 tokens)
I     254
I6    103
V7     89
V      79
IV     46
grieg_lyric_pieces MAJOR unigrams (659 types, 4882 tokens)
I     566
V7    264
V     224
IV    127
I6    116
liszt_pelerinage MAJOR unigrams (552 types, 3455 tokens)
I     501
V7    250
V     155
I6    109
IV    108
medtner_tales MAJOR unigrams (636 types, 2864 tokens)
I     263
V     139
V7    122
I6     83
IV     61
schumann_kinderszenen MAJOR unigrams (114 types, 714 tokens)
I      106
I6      69
V       65
V7      50
V43     30
tchaikovsky_seasons MAJOR unigrams (194 types, 1734 tokens)
I      330
V7     100
I6      75
V       69
V43     54
beethoven_piano_sonatas MINOR unigrams (498 types, 6379 tokens)
i        857
V        584
V7       389
i6       350
V(64)    260
chopin_mazurkas MINOR unigrams (388 types, 3700 tokens)
i       511
V7      339
V       249
III      82
V7/V     77
debussy_suite_bergamasque MINOR unigrams (228 types, 678 tokens)
i         57
III       23
V7/III    19
VI        17
i6        16
dvorak_silhouettes MINOR unigrams (69 types, 459 tokens)
i        101
V7        50
V         42
i6        34
V(64)     20
grieg_lyric_pieces MINOR unigrams (506 types, 3259 tokens)
i     380
V     211
V7    110
i6     82
iv     58
liszt_pelerinage MINOR unigrams (322 types, 1581 tokens)
i        159
i6       100
V         99
V7        60
V(64)     44
medtner_tales MINOR unigrams (827 types, 3806 tokens)
i     346
i6    175
V     131
V7    109
iv     77
schumann_kinderszenen MINOR unigrams (50 types, 197 tokens)
i        45
V7       15
V        13
V(64)    11
V6       10
tchaikovsky_seasons MINOR unigrams (125 types, 1258 tokens)
i      247
V7     110
i6      87
V       72
V65     53
types_shared_between_corpora = {}
for (is_minor, corpus_name), ugs in corpus_wise_unigrams.iter_group_results():
    if (is_minor, ) in types_shared_between_corpora:
        types_shared_between_corpora[(is_minor, )] = types_shared_between_corpora[(is_minor, )].intersection(ugs.index)
    else:
        types_shared_between_corpora[(is_minor, )] = set(ugs.index)
sorted_unigrams = unigrams.get_group_results().to_dict()
types_shared_between_corpora = {k: sorted(v, key=lambda x: sorted_unigrams[x][k], reverse=True) for k, v in types_shared_between_corpora.items()}
n_types = {k: len(v) for k, v in types_shared_between_corpora.items()}
print(f"Chords which occur in all corpora, sorted by descending global frequency:\n{types_shared_between_corpora}\nCounts: {n_types}")
Chords which occur in all corpora, sorted by descending global frequency:
{(False,): ['I', 'V7', 'V', 'I6', 'IV', 'V43', 'V2', 'V6', 'vi', 'ii', 'ii6', 'V65', 'IV6', 'I64', 'viio6', 'ii7', 'V7/IV', 'V64', 'ii65', 'vi6', 'iii', 'V7(4)', 'V/V', 'ii2', 'ii64', '#viio7/vi'], (True,): ['i', 'V', 'V7', 'i6', 'iv', 'VI', 'V65', '#viio43', 'V7/V', 'iv64']}
Counts: {(False,): 26, (True,): 10}

Per piece

piece_wise_unigrams = dc.Pipeline([dc.PieceGrouper(), dc.ChordSymbolUnigrams()]).process_data(mode_slices)
piece_wise_unigrams.get_group_results()
V7 I6 I V(64) ii6 V ii6(2) V7(+b9) V65/V viio7/V I64 V7(b9) viio43 V43 V2 V6 V43/V IVM2 IV(0) V7(b2) I(974) V65 ii7 iii iii(0) iio64 viio6 viio64 IV V7(4) IV6 iii6 I(64) V2(4) IV(94) V6/V I(4) V(9) I(94) I(+4) I6(2) ii ii(9) I6(6#4#2) ii65 I(97b64) vii%43 viio IV6(+2) vi6 ... I(7)/V #vii%65(b3)/V/V #vii%65/V/V I(94)/V V7(b5)/ii Fr6/#iii #VI+6 ii%2(6) ii2(6)/III IIIM43 Ger2(2) i(b964) i64(7)/#vii viio2/#VI V+2/bII/iii bII6/iii V+2/bII V64(b5) i(#11^9) viio2/#IV V7/#IV i64(7)/iv iv7/IV i64(7)/vi V64(#6) ii%7(4)/v i6(6)/v bII7 Fr2/iv VII(+4) #vi%7(b4) ii%43(+6) Ger6(4)/V IV/vii I(4)/VI iv(+#7) V(b5+4)/V i%65 v6(4) IV/#vi i/#vi vi6/III vii%43/V/III V64(#7) V/#vi vi%7/#vi vi%7/bv vi%7/iii ii%7/bV v6/vii
localkey_is_minor fname
False 01-1 11 8 7 5 5 5 3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01-2 19 17 31 15 9 10 0 0 0 0 1 0 4 0 10 0 0 0 0 0 4 3 0 0 0 0 0 5 5 9 7 5 5 4 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01-3 8 8 16 3 9 1 0 0 0 0 0 0 0 3 0 7 0 0 0 0 0 1 0 0 0 0 4 0 1 0 1 5 0 0 0 0 2 0 0 0 0 3 0 0 0 0 0 1 0 2 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01-4 4 2 11 2 4 2 0 0 0 0 1 0 0 3 2 4 0 0 0 0 0 2 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
02-1 20 32 76 3 6 51 0 0 2 0 1 0 1 7 13 24 4 0 0 0 0 9 1 0 0 0 4 6 1 0 11 8 0 0 0 2 2 0 0 0 0 4 0 0 1 0 0 8 0 3 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
True 161.01_Sposalizio 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
BI145-1op50-1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
op42n02 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
op71n04 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
23-2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

411 rows × 3125 columns

types_shared_between_pieces = {}
for (is_minor, corpus_name), ugs in piece_wise_unigrams.iter_group_results():
    if is_minor in types_shared_between_pieces:
        types_shared_between_pieces[is_minor] = types_shared_between_pieces[is_minor].intersection(ugs.index)
    else:
        types_shared_between_pieces[is_minor] = set(ugs.index)
print(types_shared_between_pieces)
{False: set(), True: set()}

Bigrams

Whole dataset

bigrams = dc.ChordSymbolBigrams().process_data(mode_slices)
bigrams.get_group_results()
V7 V I V2 V(64) I I6 V65 I V V43 I IV V6 I V V(64) I6 I IV V7 V7/V V7(6) V43 viio6 V ii6 I I(9) I6 V7(4) V7(9) I6 viio6 I64 I I(4) I6 I V2 V7/IV I6 V7 viio V6 I6 ... V7 V65/iv V7(#6)/V V7(#96#4)/VI V7(#94) V7(#9) V7(#62)/iv V7(#5)/bII/iv V7(#2)/bII V7(#11) V7(#11#96)/VI V7 V65/v V7 V65/vii V65/v V65/iv V7 V65/iv V7 vo7
I I V V7 I6 V7 I6 I I IV V7 I V43 I I V6 I6 V IV I64 V2 vi I6 V(64) V V7 I6 I V2 V(64) V65 I viio6 V7 V7 V43 I6 I ii6 IV6 I V2 viio6 I IV V V7(9) I V ii6 ... VIM7(+2) VIM7 VI(11) V7(6)/bII ii%65(9) iio6/V #viio7/iv iv7 V7(#11#96)/VI V7 V I(4) I V7(2)/iv bII/iv V2/bII V7 V7/VI iv6/IV V7(4)/VII V7(#96) V7(#94) V/v #ivo7 #iii6 vii(11) VM2(9)/iv V7(6)/v ii%43 V65(b9)/VI V7/iv V7/VI V6/iv V2(9)/III IV IIIM2/iv #vi%7 #vi2 #viio7(9) #viio7/#vi Ger6(b8) I(#7) III6(4) IV7(4) V(#6) V(#64) V(#7) V(4) V65(11)/bII i
localkey_is_minor
False 1244 581 441 402 344 309 294 292 261 244 232 218 210 164 163 159 147 141 137 135 132 125 122 118 117 116 115 113 112 111 111 110 109 107 98 96 94 94 93 92 85 85 81 81 79 78 78 77 76 76 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
True 42 35 22 21 6 150 10 1 17 4 141 4 4 2 6 7 2 139 6 2 2 1 1 61 34 65 3 1 38 0 11 8 4 26 20 5 2 2 4 0 8 1 2 5 4 0 16 4 44 0 ... 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

2 rows × 14058 columns

modes = {True: 'MINOR', False: 'MAJOR'}
for (is_minor,), ugs in bigrams.iter_group_results():
    print(f"{modes[is_minor]} BIGRAMS\n{ugs.shape[0]} transition types, {ugs.sum()} tokens")
    print(ugs.head(20).to_string())
MAJOR BIGRAMS
8506 transition types, 34551 tokens
from   to
V7     I      1244
V      I       581
I      V       441
       V7      402
V2     I6      344
V(64)  V7      309
I      I6      294
I6     I       292
V65    I       261
I      IV      244
V      V7      232
V43    I       218
I      V43     210
IV     I       164
V6     I       163
I      V6      159
V      I6      147
V(64)  V       141
I6     IV      137
I      I64     135
MINOR BIGRAMS
6561 transition types, 20727 tokens
from   to
V7     i        577
V      i        420
i      V        210
       V7       196
       i6       174
V(64)  V7       150
i6     i        147
V      V7       141
V(64)  V        139
V43    i        133
V65    i        129
V6     i        118
i      V6        99
V2     i6        94
i      iv        88
       VI        78
       V43       75
V7(6)  V7        65
V7     V(64)     61
V(4)   V         58

Per corpus

corpus_wise_bigrams = dc.Pipeline([dc.CorpusGrouper(), dc.ChordSymbolBigrams()]).process_data(mode_slices)
corpus_wise_bigrams.get_group_results()
V7 V V2 I V(64) I6 I V65 I V43 V I V(64) V6 I I6 V ii6 viio6 I V V43 I6 viio6 I I6 V7 V2 I6 IV I I6 viio V6 I ii6 V7(4) V7 V65/V I64 ii6 I IV6 V7/V iii6 V43 ii6 ... i(+4) iv64(4) ii%65(4) iv64 ii%2 iv viio2 V2/V V2/iv V43(+6)/iv V6 V65 V2 V V7(6) VI(4) V7(6#2) V7 i V7 #viio43 #viio2 ii%7 i iv6/v Ger6 Fr6/iv i6 V7/v V43(4)/V i/iv V6 i6(#76) i6 V65 V7 i/iv V43 i V/iv ii%65 V2 ii%65 V2 V2(6)/bII i(4)/v V2/bII ii%65(2)
I I I6 V V7 I I6 I V7 V43 I V7 V6 V I V2 viio6 I6 V(64) I V65 V2 I6 IV I6 IV ii6 V2 V(64) I V43 I6 viio6 ii6 I V I64 IV6 I6 V7 V V I V7 vi iii6 V ii6 V7 V ... i iv64 ii%65 V7(64) i(9) V43(+6)/iv #vi%2 V(#74) iv6(#72) V43/iv V65(6) i(9) i6(#72) V7(6#2) V65/iv i6(9) VI(6) V7 i(+4) V2(6) ii%65 i6(2) V7(#2) V7(2) #viio43(2) V/v VI I(4) V65(6) i(4)/v V6 v6/iv Fr6 i6(9) Fr6/iv i(#4) i(6) i6(#7) iv6/v VI(6) V7(#9) ii%65 ii%7(4) i6(#4) V43 i6(#76) V2/bII i/v ii%65(2) ii%65
localkey_is_minor corpus
False beethoven_piano_sonatas 525 318 238 228 212 212 186 175 167 167 160 138 124 118 115 107 103 100 95 92 91 91 88 81 81 81 75 68 68 68 67 67 67 66 61 59 57 52 50 47 44 43 43 41 41 40 39 39 39 36 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chopin_mazurkas 336 51 18 70 48 16 14 21 92 1 10 58 10 8 0 11 0 0 5 2 6 2 1 4 0 96 9 0 34 6 2 8 3 0 12 6 34 13 0 7 10 1 5 26 10 0 56 0 3 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
debussy_suite_bergamasque 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
dvorak_silhouettes 64 37 29 17 6 16 21 2 45 5 3 2 3 0 6 8 6 7 0 9 2 0 1 16 5 3 0 14 0 3 0 11 7 0 2 0 0 1 0 3 1 0 0 0 2 0 6 0 3 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
grieg_lyric_pieces 102 37 8 39 10 4 13 6 30 3 16 11 2 0 6 0 0 2 0 3 1 3 3 6 2 18 4 0 9 1 6 20 0 2 0 2 19 8 0 11 3 0 25 2 26 0 6 0 5 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
liszt_pelerinage 97 47 6 44 10 22 21 14 42 4 4 8 5 4 12 1 0 21 4 0 2 2 4 8 0 23 1 3 4 2 2 5 0 6 0 6 11 5 1 26 11 0 13 1 37 0 0 0 0 3 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
medtner_tales 50 45 10 27 2 2 12 5 17 2 2 5 7 2 5 0 0 9 0 5 2 10 3 7 2 20 4 0 1 0 2 6 4 0 2 2 6 5 2 2 0 3 4 0 2 0 9 0 3 1 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
schumann_kinderszenen 26 17 10 4 14 12 20 16 5 15 2 4 0 6 1 0 0 2 4 0 0 4 7 8 4 3 0 0 0 0 13 2 0 2 0 0 2 2 0 3 0 0 0 0 1 0 1 0 0 3 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tchaikovsky_seasons 38 29 25 12 7 8 7 22 4 13 21 6 8 3 18 5 0 6 3 2 3 0 8 7 0 0 0 0 2 1 4 2 0 0 0 1 6 5 0 6 2 4 4 0 6 0 0 0 2 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
True beethoven_piano_sonatas 29 24 6 8 92 1 9 12 13 4 2 61 7 109 6 2 4 2 0 1 9 10 3 6 2 3 4 1 34 4 3 1 2 0 4 15 0 0 2 7 29 15 2 3 1 4 5 3 7 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
chopin_mazurkas 8 1 0 5 22 0 0 4 6 0 0 47 0 13 0 0 0 0 0 0 2 11 0 0 0 1 0 0 14 0 2 0 0 0 0 23 0 0 0 9 4 0 0 0 0 0 16 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
debussy_suite_bergamasque 1 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
liszt_pelerinage 4 8 0 9 9 0 1 0 2 0 2 4 0 5 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 1 0 0 0 0 0 3 0 0 1 0 1 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
dvorak_silhouettes 0 2 0 0 7 0 0 0 0 0 0 6 0 5 0 0 0 0 0 0 0 4 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 4 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
grieg_lyric_pieces 0 0 0 0 2 0 0 1 0 0 0 6 0 2 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 2 0 0 0 0 0 0 6 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
medtner_tales 0 0 0 0 4 0 0 0 0 0 0 12 0 1 0 0 0 0 0 0 0 6 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 2 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
schumann_kinderszenen 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tchaikovsky_seasons 0 0 0 0 7 0 0 0 0 0 0 3 0 4 0 0 0 0 0 0 0 1 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 ... 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

18 rows × 14058 columns

for (is_minor, corpus_name), ugs in corpus_wise_bigrams.iter_group_results():
    print(f"{corpus_name} {modes[is_minor]} bigrams ({ugs.shape[0]} transition types, {ugs.sum()} tokens)")
    print(ugs.head(5).to_string())
beethoven_piano_sonatas MAJOR bigrams (3481 transition types, 14884 tokens)
from   to
V7     I     525
V      I     318
V2     I6    238
I      V     228
V(64)  V7    212
chopin_mazurkas MAJOR bigrams (1393 transition types, 5136 tokens)
from  to
V7    I     336
I     IV     96
      V7     92
      V      70
IV    I      67
debussy_suite_bergamasque MAJOR bigrams (221 transition types, 327 tokens)
from    to
I       iii      8
V7      I        6
iii     bIII6    5
iii6    V7       5
vii%65  iii6     4
dvorak_silhouettes MAJOR bigrams (347 transition types, 1013 tokens)
from  to
V7    I     64
I     V7    45
V     I     37
V2    I6    29
I     I6    21
grieg_lyric_pieces MAJOR bigrams (1735 transition types, 4755 tokens)
from   to
V7     I     102
I      V      39
V      I      37
V7(9)  I      31
I      V7     30
liszt_pelerinage MAJOR bigrams (1471 transition types, 3323 tokens)
from  to
V7    I     97
V     I     47
I     V     44
      V7    42
I(9)  I     41
medtner_tales MAJOR bigrams (1593 transition types, 2818 tokens)
from  to
V7    I     50
V     I     45
I     V     27
      IV    20
      V7    17
schumann_kinderszenen MAJOR bigrams (256 transition types, 676 tokens)
from  to
V7    I      26
I     I6     20
V     I      17
V65   I      16
I     V43    15
tchaikovsky_seasons MAJOR bigrams (532 transition types, 1619 tokens)
from    to
V7      I     38
V       I     29
V2      I6    25
I(6#4)  I     22
V65     I     22
beethoven_piano_sonatas MINOR bigrams (1785 transition types, 6227 tokens)
from   to
V      i     200
V7     i     160
V(64)  V     109
i      V      94
V(64)  V7     92
chopin_mazurkas MINOR bigrams (1129 transition types, 3625 tokens)
from  to
V7    i          173
i     V7          57
V     V7          47
      i           41
bII   V43/bII     32
debussy_suite_bergamasque MINOR bigrams (477 transition types, 675 tokens)
from    to
i(2)    i         8
i       i(2)      8
V7/III  III       6
III     V7/III    6
i43     V7/VII    5
dvorak_silhouettes MINOR bigrams (170 transition types, 427 tokens)
from  to
V7    i      35
V     i      18
i     V7     16
i6    V64     9
      iv      8
grieg_lyric_pieces MINOR bigrams (1218 transition types, 3178 tokens)
from  to
V     i     65
V7    i     59
i     V     31
      V7    27
V(4)  V     20
liszt_pelerinage MINOR bigrams (812 transition types, 1515 tokens)
from  to
V     i     36
i     V     27
V7    i     21
V6    i     19
i     i6    19
medtner_tales MINOR bigrams (2249 transition types, 3731 tokens)
from  to
V7    i       58
i     i6      41
i6    i       35
V     i       26
i     ii%7    20
schumann_kinderszenen MINOR bigrams (76 transition types, 175 tokens)
from   to
V7     i     11
i      V6    10
V      i      9
i      iv     8
V(64)  V7     7
tchaikovsky_seasons MINOR bigrams (310 transition types, 1174 tokens)
from  to
V7    i     58
V65   i     38
V     i     24
i     V7    23
      VI    22
normalized_corpus_unigrams = {group: (100 * ugs / ugs.sum()).round(1).rename("frequency") for group, ugs in corpus_wise_unigrams.iter_group_results()}
transitions_from_shared_types = {
    False: {},
    True: {}
}
for (is_minor, corpus_name), bgs in corpus_wise_bigrams.iter_group_results():
    transitions_normalized_per_from = bgs.groupby(level="from", group_keys=False).apply(lambda S: (100 * S / S.sum()).round(1))
    most_frequent_transition_per_from = transitions_normalized_per_from.rename('fraction').reset_index(level=1).groupby(level=0).nth(0)
    most_frequent_transition_per_shared = most_frequent_transition_per_from.loc[types_shared_between_corpora[(is_minor,)]]
    unigram_frequency_of_shared = normalized_corpus_unigrams[(is_minor, corpus_name)].loc[types_shared_between_corpora[(is_minor,)]]
    combined = pd.concat([unigram_frequency_of_shared, most_frequent_transition_per_shared], axis=1)
    transitions_from_shared_types[is_minor][corpus_name] = combined

For the labels that the major segments of all corpora share, show

  • the unigram frequency of the given label in that corpus

  • the chord that the label transitions to most frequently

  • the percentage of this transition from all transitions from the label in question

pd.concat(transitions_from_shared_types[False].values(), keys=transitions_from_shared_types[False].keys(), axis=1)
beethoven_piano_sonatas chopin_mazurkas debussy_suite_bergamasque dvorak_silhouettes grieg_lyric_pieces liszt_pelerinage medtner_tales schumann_kinderszenen tchaikovsky_seasons
frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction
I 14.6 V 11.0 18.1 IV 11.0 7.2 iii 38.1 23.8 V7 19.7 11.6 V 8.1 14.5 V 9.8 9.2 V 10.6 14.8 I6 23.0 19.0 I(6#4) 8.6
V7 6.8 I 51.8 12.5 I 51.8 6.3 I 30.0 8.3 I 72.7 5.4 I 39.7 7.2 I 40.1 4.3 I 41.0 7.0 I 55.3 5.8 I 38.8
V 7.8 I 27.3 4.8 V7 23.2 1.2 IV 75.0 7.4 I 50.7 4.6 I 16.7 4.5 I 30.7 4.9 I 33.1 9.1 I 29.3 4.0 I 43.3
I6 7.4 I 19.1 1.3 I 23.9 2.7 vi64 11.1 9.7 I 16.8 2.4 vi 11.3 3.2 I 21.8 2.9 ii 16.9 9.7 V 21.7 4.3 I 12.5
IV 2.6 I6 17.1 3.5 I 36.6 3.0 iii 40.0 4.3 I6 24.4 2.6 I 17.3 3.1 I 34.6 2.1 I6 10.0 3.2 ii7 18.2 1.0 IV6 40.0
V43 2.9 I 36.4 0.3 I 55.6 0.9 V7 33.3 1.1 I 25.0 1.6 I 20.8 0.5 I 21.1 0.5 V7 25.0 4.2 V2(9) 26.7 3.1 I 39.6
V2 2.9 I6 54.2 0.7 I6 50.0 1.8 iii 66.7 3.0 I6 90.6 0.8 vi43 25.6 0.7 I6 27.3 0.7 I6 50.0 1.4 I6 100.0 2.0 I6 75.8
V6 3.1 I 24.8 0.5 IV6 28.0 1.2 IV6 50.0 0.7 I 75.0 0.5 I 24.0 0.9 I 40.0 0.8 I 20.8 0.7 V7 80.0 2.8 I 40.0
vi 1.5 ii6 10.3 1.4 IV6(11) 8.2 1.2 I6 25.0 1.9 V65/V 23.5 2.4 IV 23.3 2.5 vi(9) 16.7 1.5 #viio6/vi 7.0 1.3 ii6 37.5 1.3 bVI+ 17.4
ii 1.6 V7 9.8 1.4 V7 18.9 3.0 I+ 20.0 1.4 viio6 20.0 1.1 V64 15.1 2.0 I 18.6 1.8 ii7(9) 14.0 1.8 viio6 30.8 2.1 V7 17.6
ii6 2.6 V(64) 24.4 1.3 V7 39.4 0.9 viio6 66.7 0.5 I 50.0 0.3 V6 14.3 0.8 V(64) 14.3 0.7 ii7(9) 15.8 1.4 V(64) 40.0 0.5 V(64) 33.3
V65 2.2 I 52.9 0.9 I 43.8 1.5 iii64 40.0 0.8 V7 55.6 0.6 I 19.4 1.0 I 42.4 0.4 I 41.7 2.7 I 84.2 2.3 I 56.4
IV6 1.7 iii6 15.2 0.9 I 18.8 3.6 vi7 16.7 0.7 viio65/V 28.6 0.9 V7 20.0 1.1 bV6 10.8 0.9 vi2 14.8 1.4 viio2 20.0 1.0 I 53.3
I64 1.4 I 21.0 0.9 viio7(4) 25.0 0.9 IV(+6) 33.3 0.2 I6 100.0 1.4 I 37.3 1.2 I 31.7 1.3 I 10.8 1.0 V(4) 28.6 1.8 V7 25.8
viio6 1.8 I 33.9 0.1 I 50.0 1.8 IV64 33.3 2.7 V 31.0 0.2 I 30.0 0.0 iv6(b9) 100.0 0.5 I 35.7 0.6 I6 100.0 0.1 I 100.0
ii7 0.2 I6 41.7 0.6 V7 35.5 2.7 viio6 37.5 0.7 I 37.5 2.0 V7 15.1 1.0 V7 22.6 1.7 V7 20.8 1.4 V 30.0 0.3 V7(4) 80.0
V7/IV 0.5 IV 37.7 1.2 IV 45.9 0.3 ii65(2) 100.0 0.6 IV 40.0 0.4 IV 40.0 1.0 IV 23.5 0.8 ii%7/IV 13.0 1.3 IV 66.7 0.2 viio43 100.0
V64 0.5 I 37.5 0.5 v 15.4 1.2 I(72) 100.0 0.1 I 100.0 0.8 V 61.0 0.3 I 66.7 0.5 V 41.7 1.4 V64(6) 40.0 0.8 I 57.1
ii65 0.5 V65/V 18.6 0.6 I 36.7 2.7 ii43 22.2 0.3 I 100.0 0.6 ii7 45.2 0.2 ii7(9) 33.3 0.8 ii65(4) 16.7 1.1 V 25.0 0.5 I+6 88.9
vi6 0.6 V6 29.3 0.5 ii7 20.8 0.6 IV6 50.0 0.7 I 62.5 0.3 V6 15.4 0.3 ii(11) 27.3 0.8 iii2 13.0 0.6 V/V 100.0 0.3 iii 40.0
iii 0.3 V43 17.8 0.9 #viio(b3)/iii 11.4 5.7 bIII6 27.8 0.4 IV 50.0 0.7 iii7 23.3 0.1 vi 60.0 0.4 V6 16.7 1.0 vi6 57.1 0.3 iii6 33.3
V7(4) 0.3 V7 92.2 0.2 V7 70.0 0.6 V7 100.0 0.3 V7 100.0 0.3 V7 78.6 1.0 V7 72.2 0.6 I 35.3 0.4 V7 100.0 0.6 V7 60.0
V/V 0.3 V 31.9 0.1 V 100.0 0.3 V7 100.0 0.4 ii 100.0 0.2 I/V 33.3 0.1 I/V 100.0 0.3 V2 50.0 0.6 V 100.0 0.2 V7(+b9) 66.7
ii2 0.2 V6 20.0 0.1 ii2(9) 28.6 0.9 vii%7 33.3 0.2 I 100.0 0.5 V65 26.9 0.1 IV 33.3 0.1 ii43 66.7 0.1 ii65 100.0 0.5 V6 22.2
ii64 0.2 V7 25.7 0.2 V65 50.0 1.2 ii43 50.0 0.1 V65 100.0 0.1 IM7 57.1 0.0 v6 100.0 0.2 V6/ii 33.3 0.6 V65 100.0 0.4 viio2 40.0
#viio7/vi 0.3 vi 66.7 0.2 vi 27.3 0.3 IV6 100.0 0.2 vi 100.0 0.0 vi 100.0 0.2 IV64 37.5 0.1 I/IV 50.0 0.1 vi 100.0 0.3 vi 80.0

For the labels that the minor segments of all corpora share, show

  • the unigram frequency of the given label in that corpus

  • the chord that the label transitions to most frequently

  • the percentage of this transition from all transitions from the label in question

pd.concat(transitions_from_shared_types[True].values(), keys=transitions_from_shared_types[False].keys(), axis=1)
beethoven_piano_sonatas chopin_mazurkas debussy_suite_bergamasque dvorak_silhouettes grieg_lyric_pieces liszt_pelerinage medtner_tales schumann_kinderszenen tchaikovsky_seasons
frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction frequency to fraction
i 13.4 V 12.1 13.8 V7 12.3 8.4 i(2) 14.8 22.0 V7 20.3 11.7 V 9.5 10.1 V 19.3 9.1 i6 12.8 22.8 V6 31.2 19.6 V7 12.0
V 9.2 i 34.6 6.7 V7 19.2 1.3 V7 22.2 9.2 i 46.2 6.5 i 31.6 6.3 i 40.0 3.4 i 20.6 6.6 i 75.0 5.7 i 38.1
V7 6.1 i 41.6 9.2 i 52.0 1.2 i 25.0 10.9 i 72.9 3.4 i 53.6 3.8 i 35.6 2.9 i 53.7 7.6 i 73.3 8.7 i 54.2
i6 5.5 i 18.8 1.6 i 35.0 2.4 iv 12.5 7.4 V64 26.5 2.5 ii%7 8.8 6.3 V6 10.6 4.6 i 20.1 4.6 #viio65 33.3 6.9 i 12.9
iv 3.2 i6 12.6 1.6 i 28.8 2.2 i 13.3 4.4 i 40.0 1.8 V 10.7 1.2 #viio65 15.8 2.0 i6 9.2 4.6 V(64) 50.0 1.7 iio 20.0
VI 1.5 V 18.3 0.8 III 20.7 2.5 VI64 17.6 2.2 VI6 30.0 1.4 i 28.9 0.9 i 28.6 1.9 VIM7 10.0 0.5 V/III 100.0 2.5 iv65 37.5
V65 1.8 i 53.4 0.7 i 56.0 0.4 V43(2) 33.3 0.7 i 100.0 0.1 I 50.0 0.8 i 46.2 0.2 i6 33.3 1.5 i 100.0 4.2 i 71.7
#viio43 1.8 i6 31.3 0.3 i 44.4 0.1 iv 100.0 0.2 #viio65 100.0 0.6 #viio65 25.0 1.5 i6(#7) 18.2 0.4 I6 18.8 4.1 i6 62.5 1.0 i 54.5
V7/V 0.1 V 71.4 2.1 V7 28.6 0.1 ii%43/V 100.0 0.9 V 100.0 0.7 V7 33.3 0.3 V6 100.0 0.5 ii%43(11) 11.1 0.5 V 100.0 0.1 V(4) 100.0
iv64 0.4 #viio7 45.8 1.3 i 31.9 0.9 vii%2/III 33.3 0.2 iv 100.0 0.6 i 26.3 0.4 #viio43/iv 33.3 0.7 bII6/iv 10.7 0.5 #viio7 100.0 0.4 i 60.0

Per piece

piece_wise_bigrams = dc.Pipeline([dc.PieceGrouper(), dc.ChordSymbolBigrams()]).process_data(mode_slices)
piece_wise_bigrams.get_group_results()
WARNING:dimcat.data.base:ChordSymbolBigrams yielded no result for group (True, '23-2')
V(64) viio7/V ii6(2) V65/V V7(+b9) V7 ii6 V7 I V V6 V43/V V2 V7 I6 V7(b9) I6 ii6 viio43 I64 I6 iio64 viio6 ii7 V7 iii(0) viio64 V7(b2) I V7 IV I I(974) I6 I64 IV(0) V43 IVM2 V V(64) I V43 V7(4) V(64) I ... i/ii IV/v IV/ii V/v V/ii V2/V iv64 #vi V/iii VII bII V/bv v vi%7 vi%7/#vi vi%7/bv V/iii V/bv #viio2 V/#vi #viio2 #viio43 #viio65 #viio7 III IV V V(+4) vi%7/iii #VII6 V/V ii%7/bV bII64 Ger2/iv I64 v6 i(64) V(6) V7(b5) V(#7) i v6/vii V(964) ii%65 ii%7 ii%65
V7 V(64) ii6 V V7 I64 V65/V I viio7/V I6 I V I6 V7(b9) ii6 V7 V6 V(64) I6 V7(+b9) ii6(2) V7 iii(0) V43 V7(b2) V2 iii viio6 V7 V43 I(974) viio64 V65 ii6(2) I IV(0) viio43 I6 IV I6 ii7 V43/V V7(+b9) viio43 V2 V43/V I V7 V IV6 ... IV/ii i/v i/ii i/ii i/#vi iv64 V(+4) VI vi%7/iii #vi VI vi%7/bv #viio43 #viio2 #viio43 #viio65 #viio43 #viio2 #viio2 vi%7/#vi V/#vi V/iii V/bv V/#vi V/iii #viio7 V/bv VII bII III vi%7 III #viio7 ii%7/bV bII64 Ger2/iv I64 V7(b5) #VII6 iv6(4) I(6) V(#7) V(94) V7(b5) v6/vii iv6 ii%65 VIM43 V(964) #viio2
localkey_is_minor fname
False 01-1 4 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01-2 5 0 0 0 0 0 0 9 0 0 0 0 6 0 4 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 9 6 6 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01-3 3 0 0 0 0 0 0 6 0 0 0 0 0 0 4 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
01-4 2 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
02-1 2 0 0 0 0 0 0 16 0 5 3 0 9 0 0 0 4 0 1 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 1 0 0 0 0 0 0 0 2 5 0 1 4 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
True op38n01 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
op71n04 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
161.01_Sposalizio 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
op12n01 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
op71n07 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 1

410 rows × 14058 columns